fix: maping of image coordinates to pdf coordinates (table inference)
This commit is contained in:
parent
b854312b08
commit
3b8d6eda04
2
.gitignore
vendored
2
.gitignore
vendored
@ -49,4 +49,4 @@ __pycache__/
|
||||
!drivers
|
||||
|
||||
# unignore files
|
||||
!bom.*
|
||||
!bom.*
|
||||
|
||||
@ -13,9 +13,7 @@ logger.add(sys.stdout, level="INFO")
|
||||
def bashcmd(cmds: list) -> str:
|
||||
try:
|
||||
logger.debug(f"running: {' '.join(cmds)}")
|
||||
return subprocess.run(
|
||||
cmds, check=True, capture_output=True, text=True
|
||||
).stdout.strip("\n")
|
||||
return subprocess.run(cmds, check=True, capture_output=True, text=True).stdout.strip("\n")
|
||||
except:
|
||||
logger.warning(f"Error executing the following bash command: {' '.join(cmds)}.")
|
||||
raise
|
||||
|
||||
BIN
data/2017-1078223.pdf
Normal file
BIN
data/2017-1078223.pdf
Normal file
Binary file not shown.
BIN
data/2017-1078223.vlp_output.annotated.pdf
Normal file
BIN
data/2017-1078223.vlp_output.annotated.pdf
Normal file
Binary file not shown.
98825
data/2017-1078223.vlp_output.json
Normal file
98825
data/2017-1078223.vlp_output.json
Normal file
File diff suppressed because it is too large
Load Diff
BIN
data/table_inference_test_files.zip
Normal file
BIN
data/table_inference_test_files.zip
Normal file
Binary file not shown.
8
flake.lock
generated
8
flake.lock
generated
@ -20,17 +20,17 @@
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1711703276,
|
||||
"narHash": "sha256-iMUFArF0WCatKK6RzfUJknjem0H9m4KgorO/p3Dopkk=",
|
||||
"lastModified": 1715155958,
|
||||
"narHash": "sha256-I/V8oiPfK0KIQUc+3sAQLJJYa7L3edd9gdnKP2XvT7E=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "d8fe5e6c92d0d190646fb9f1056741a229980089",
|
||||
"rev": "240b1d794bbfca3522dec880a3aa300932bbfd98",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "NixOS",
|
||||
"ref": "nixos-unstable",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "240b1d794bbfca3522dec880a3aa300932bbfd98",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
description = "An flake to use a Python poetry project in an FHS environment when poetry2nix is uncooperative";
|
||||
inputs = {
|
||||
flake-utils.url = "github:numtide/flake-utils";
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/240b1d794bbfca3522dec880a3aa300932bbfd98";
|
||||
};
|
||||
outputs = {
|
||||
self,
|
||||
|
||||
@ -26,4 +26,5 @@ best_result = list(pipe(data={"pdf": pdf_bytes, "vlp_output": vlp_output}))
|
||||
|
||||
# print(best_result)
|
||||
|
||||
annotate_pdf(pdf_bytes, best_result, output_path=args.output)
|
||||
if __name__ == "__main__":
|
||||
annotate_pdf(pdf_bytes, best_result, output_path=args.output)
|
||||
|
||||
@ -27,7 +27,6 @@ def annotate_page(page: fitz.Page, prediction):
|
||||
bbox = itemgetter("x1", "y1", "x2", "y2")(box["box"])
|
||||
label, probability, uuid = itemgetter("label", "probability", "uuid")(box)
|
||||
|
||||
bbox = mirror_on_x_axis(bbox, page.bound().height)
|
||||
x0, y0, x1, y1 = bbox
|
||||
page.draw_rect(fitz.Rect(x0, y0, x1, y1), color=(0, 0, 1), width=2)
|
||||
label_x, label_y = x0, y0 - 5
|
||||
@ -44,14 +43,6 @@ def annotate_page(page: fitz.Page, prediction):
|
||||
return page
|
||||
|
||||
|
||||
def mirror_on_x_axis(bbox, page_height):
|
||||
x0, y0, x1, y1 = bbox
|
||||
y0_new = page_height - y1
|
||||
y1_new = page_height - y0
|
||||
|
||||
return x0, y0_new, x1, y1_new
|
||||
|
||||
|
||||
@singledispatch
|
||||
def provide_byte_stream(pdf: Union[bytes, Path, str]) -> None:
|
||||
pass
|
||||
|
||||
@ -3,10 +3,10 @@ import os
|
||||
import cv2
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
if os.environ.get("USER") == "isaac":
|
||||
import matplotlib
|
||||
# if os.environ.get("USER") == "isaac":
|
||||
# import matplotlib
|
||||
|
||||
matplotlib.use("module://matplotlib-backend-wezterm")
|
||||
# matplotlib.use("module://matplotlib-backend-wezterm")
|
||||
|
||||
|
||||
def show_image_cv2(image, maxdim=700):
|
||||
|
||||
@ -2,7 +2,7 @@ import json
|
||||
from dataclasses import dataclass
|
||||
from functools import partial
|
||||
from operator import itemgetter
|
||||
from typing import SupportsIndex, Tuple
|
||||
from typing import Literal, SupportsIndex, Tuple
|
||||
|
||||
import fitz # type: ignore
|
||||
import numpy as np
|
||||
@ -54,6 +54,38 @@ def rescale_to_pdf(bbox: BBoxType, page_info: PageInfo) -> tuple[float, float, f
|
||||
return round3((bbox[0] * ratio_w, bbox[1] * ratio_h, bbox[2] * ratio_w, bbox[3] * ratio_h))
|
||||
|
||||
|
||||
def derotate_image(bbox: tuple[float, float, float, float], page_info: PageInfo) -> ...:
|
||||
def mirror_horizontal(bbox, page_height):
|
||||
x0, y0, x1, y1 = bbox
|
||||
y0_new = page_height - y1
|
||||
y1_new = page_height - y0
|
||||
|
||||
return x0, y0_new, x1, y1_new
|
||||
|
||||
def mirror_vertical(bbox, page_width):
|
||||
x0, y0, x1, y1 = bbox
|
||||
x0_new = page_width - x1
|
||||
x1_new = page_width - x0
|
||||
|
||||
return x0_new, y0, x1_new, y1
|
||||
|
||||
logger.debug(f"{page_info.rotation=}")
|
||||
match page_info.rotation:
|
||||
case 0:
|
||||
bbox = mirror_horizontal(bbox, page_info.height)
|
||||
case 90:
|
||||
pass
|
||||
case 180:
|
||||
bbox = mirror_vertical(bbox, page_info.height)
|
||||
case 270:
|
||||
bbox = mirror_vertical(mirror_horizontal(bbox, page_info.height), page_info.height)
|
||||
case _:
|
||||
logger.warning(f"Unknown rotation: {page_info.rotation}")
|
||||
pass
|
||||
|
||||
return bbox
|
||||
|
||||
|
||||
def transform_table_lines_by_page_info(bboxes: dict, offsets: tuple, page_info: PageInfo) -> dict:
|
||||
|
||||
transform = partial(rescale_to_pdf, page_info=page_info)
|
||||
@ -67,9 +99,11 @@ def transform_table_lines_by_page_info(bboxes: dict, offsets: tuple, page_info:
|
||||
|
||||
return (x1 + offset_x, y1 + offset_y, x2 + offset_x, y2 + offset_y)
|
||||
|
||||
derotate = partial(derotate_image, page_info=page_info)
|
||||
|
||||
unpack = itemgetter("x1", "y1", "x2", "y2")
|
||||
pack = lambda x: {"x1": x[0], "y1": x[1], "x2": x[2], "y2": x[3]}
|
||||
convert = compose(pack, apply_offsets, transform, unpack)
|
||||
convert = compose(pack, apply_offsets, derotate, transform, unpack)
|
||||
|
||||
table_lines = bboxes.get("tableLines", [])
|
||||
bboxes["tableLines"] = list(map(convert, table_lines))
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user