fix: maping of image coordinates to pdf coordinates (table inference)

This commit is contained in:
iriley 2024-05-15 11:48:31 +02:00
parent b854312b08
commit 3b8d6eda04
12 changed files with 98873 additions and 24 deletions

2
.gitignore vendored
View File

@ -49,4 +49,4 @@ __pycache__/
!drivers
# unignore files
!bom.*
!bom.*

View File

@ -13,9 +13,7 @@ logger.add(sys.stdout, level="INFO")
def bashcmd(cmds: list) -> str:
try:
logger.debug(f"running: {' '.join(cmds)}")
return subprocess.run(
cmds, check=True, capture_output=True, text=True
).stdout.strip("\n")
return subprocess.run(cmds, check=True, capture_output=True, text=True).stdout.strip("\n")
except:
logger.warning(f"Error executing the following bash command: {' '.join(cmds)}.")
raise

BIN
data/2017-1078223.pdf Normal file

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load Diff

Binary file not shown.

8
flake.lock generated
View File

@ -20,17 +20,17 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1711703276,
"narHash": "sha256-iMUFArF0WCatKK6RzfUJknjem0H9m4KgorO/p3Dopkk=",
"lastModified": 1715155958,
"narHash": "sha256-I/V8oiPfK0KIQUc+3sAQLJJYa7L3edd9gdnKP2XvT7E=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "d8fe5e6c92d0d190646fb9f1056741a229980089",
"rev": "240b1d794bbfca3522dec880a3aa300932bbfd98",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"rev": "240b1d794bbfca3522dec880a3aa300932bbfd98",
"type": "github"
}
},

View File

@ -2,7 +2,7 @@
description = "An flake to use a Python poetry project in an FHS environment when poetry2nix is uncooperative";
inputs = {
flake-utils.url = "github:numtide/flake-utils";
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
nixpkgs.url = "github:NixOS/nixpkgs/240b1d794bbfca3522dec880a3aa300932bbfd98";
};
outputs = {
self,

View File

@ -26,4 +26,5 @@ best_result = list(pipe(data={"pdf": pdf_bytes, "vlp_output": vlp_output}))
# print(best_result)
annotate_pdf(pdf_bytes, best_result, output_path=args.output)
if __name__ == "__main__":
annotate_pdf(pdf_bytes, best_result, output_path=args.output)

View File

@ -27,7 +27,6 @@ def annotate_page(page: fitz.Page, prediction):
bbox = itemgetter("x1", "y1", "x2", "y2")(box["box"])
label, probability, uuid = itemgetter("label", "probability", "uuid")(box)
bbox = mirror_on_x_axis(bbox, page.bound().height)
x0, y0, x1, y1 = bbox
page.draw_rect(fitz.Rect(x0, y0, x1, y1), color=(0, 0, 1), width=2)
label_x, label_y = x0, y0 - 5
@ -44,14 +43,6 @@ def annotate_page(page: fitz.Page, prediction):
return page
def mirror_on_x_axis(bbox, page_height):
x0, y0, x1, y1 = bbox
y0_new = page_height - y1
y1_new = page_height - y0
return x0, y0_new, x1, y1_new
@singledispatch
def provide_byte_stream(pdf: Union[bytes, Path, str]) -> None:
pass

View File

@ -3,10 +3,10 @@ import os
import cv2
from matplotlib import pyplot as plt
if os.environ.get("USER") == "isaac":
import matplotlib
# if os.environ.get("USER") == "isaac":
# import matplotlib
matplotlib.use("module://matplotlib-backend-wezterm")
# matplotlib.use("module://matplotlib-backend-wezterm")
def show_image_cv2(image, maxdim=700):

View File

@ -2,7 +2,7 @@ import json
from dataclasses import dataclass
from functools import partial
from operator import itemgetter
from typing import SupportsIndex, Tuple
from typing import Literal, SupportsIndex, Tuple
import fitz # type: ignore
import numpy as np
@ -54,6 +54,38 @@ def rescale_to_pdf(bbox: BBoxType, page_info: PageInfo) -> tuple[float, float, f
return round3((bbox[0] * ratio_w, bbox[1] * ratio_h, bbox[2] * ratio_w, bbox[3] * ratio_h))
def derotate_image(bbox: tuple[float, float, float, float], page_info: PageInfo) -> ...:
def mirror_horizontal(bbox, page_height):
x0, y0, x1, y1 = bbox
y0_new = page_height - y1
y1_new = page_height - y0
return x0, y0_new, x1, y1_new
def mirror_vertical(bbox, page_width):
x0, y0, x1, y1 = bbox
x0_new = page_width - x1
x1_new = page_width - x0
return x0_new, y0, x1_new, y1
logger.debug(f"{page_info.rotation=}")
match page_info.rotation:
case 0:
bbox = mirror_horizontal(bbox, page_info.height)
case 90:
pass
case 180:
bbox = mirror_vertical(bbox, page_info.height)
case 270:
bbox = mirror_vertical(mirror_horizontal(bbox, page_info.height), page_info.height)
case _:
logger.warning(f"Unknown rotation: {page_info.rotation}")
pass
return bbox
def transform_table_lines_by_page_info(bboxes: dict, offsets: tuple, page_info: PageInfo) -> dict:
transform = partial(rescale_to_pdf, page_info=page_info)
@ -67,9 +99,11 @@ def transform_table_lines_by_page_info(bboxes: dict, offsets: tuple, page_info:
return (x1 + offset_x, y1 + offset_y, x2 + offset_x, y2 + offset_y)
derotate = partial(derotate_image, page_info=page_info)
unpack = itemgetter("x1", "y1", "x2", "y2")
pack = lambda x: {"x1": x[0], "y1": x[1], "x2": x[2], "y2": x[3]}
convert = compose(pack, apply_offsets, transform, unpack)
convert = compose(pack, apply_offsets, derotate, transform, unpack)
table_lines = bboxes.get("tableLines", [])
bboxes["tableLines"] = list(map(convert, table_lines))