fix: maping of image coordinates to pdf coordinates (table inference)

2024-05-15 11:48:31 +02:00 · 2024-05-15 11:48:31 +02:00 · 3b8d6eda04
commit 3b8d6eda04
parent b854312b08
12 changed files with 98873 additions and 24 deletions
--- a/.gitignore
+++ b/.gitignore
@ -49,4 +49,4 @@ __pycache__/
 !drivers

 # unignore files
-!bom.*
+!bom.*
--- a/.hooks/poetry_version_check.py
+++ b/.hooks/poetry_version_check.py
@ -13,9 +13,7 @@ logger.add(sys.stdout, level="INFO")
 def bashcmd(cmds: list) -> str:
    try:
        logger.debug(f"running: {' '.join(cmds)}")
-        return subprocess.run(
-            cmds, check=True, capture_output=True, text=True
-        ).stdout.strip("\n")
+        return subprocess.run(cmds, check=True, capture_output=True, text=True).stdout.strip("\n")
    except:
        logger.warning(f"Error executing the following bash command: {' '.join(cmds)}.")
        raise
--- a/data/2017-1078223.pdf
+++ b/data/2017-1078223.pdf
--- a/data/2017-1078223.vlp_output.annotated.pdf
+++ b/data/2017-1078223.vlp_output.annotated.pdf
--- a/data/2017-1078223.vlp_output.json
+++ b/data/2017-1078223.vlp_output.json
--- a/data/table_inference_test_files.zip
+++ b/data/table_inference_test_files.zip
--- a/flake.lock
+++ b/flake.lock
@ -20,17 +20,17 @@
    },
    "nixpkgs": {
      "locked": {
-        "lastModified": 1711703276,
-        "narHash": "sha256-iMUFArF0WCatKK6RzfUJknjem0H9m4KgorO/p3Dopkk=",
+        "lastModified": 1715155958,
+        "narHash": "sha256-I/V8oiPfK0KIQUc+3sAQLJJYa7L3edd9gdnKP2XvT7E=",
        "owner": "NixOS",
        "repo": "nixpkgs",
-        "rev": "d8fe5e6c92d0d190646fb9f1056741a229980089",
+        "rev": "240b1d794bbfca3522dec880a3aa300932bbfd98",
        "type": "github"
      },
      "original": {
        "owner": "NixOS",
-        "ref": "nixos-unstable",
        "repo": "nixpkgs",
+        "rev": "240b1d794bbfca3522dec880a3aa300932bbfd98",
        "type": "github"
      }
    },
--- a/flake.nix
+++ b/flake.nix
@ -2,7 +2,7 @@
  description = "An flake to use a Python poetry project in an FHS environment when poetry2nix is uncooperative";
  inputs = {
    flake-utils.url = "github:numtide/flake-utils";
-    nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
+    nixpkgs.url = "github:NixOS/nixpkgs/240b1d794bbfca3522dec880a3aa300932bbfd98";
  };
  outputs = {
    self,
--- a/scripts/parse_pdf.py
+++ b/scripts/parse_pdf.py
@ -26,4 +26,5 @@ best_result = list(pipe(data={"pdf": pdf_bytes, "vlp_output": vlp_output}))

 # print(best_result)

-annotate_pdf(pdf_bytes, best_result, output_path=args.output)
+if __name__ == "__main__":
+    annotate_pdf(pdf_bytes, best_result, output_path=args.output)
--- a/src/cv_analysis/utils/annotate.py
+++ b/src/cv_analysis/utils/annotate.py
@ -27,7 +27,6 @@ def annotate_page(page: fitz.Page, prediction):
        bbox = itemgetter("x1", "y1", "x2", "y2")(box["box"])
        label, probability, uuid = itemgetter("label", "probability", "uuid")(box)

-        bbox = mirror_on_x_axis(bbox, page.bound().height)
        x0, y0, x1, y1 = bbox
        page.draw_rect(fitz.Rect(x0, y0, x1, y1), color=(0, 0, 1), width=2)
        label_x, label_y = x0, y0 - 5
@ -44,14 +43,6 @@ def annotate_page(page: fitz.Page, prediction):
    return page


-def mirror_on_x_axis(bbox, page_height):
-    x0, y0, x1, y1 = bbox
-    y0_new = page_height - y1
-    y1_new = page_height - y0
-
-    return x0, y0_new, x1, y1_new
-
-
@singledispatch
 def provide_byte_stream(pdf: Union[bytes, Path, str]) -> None:
    pass
--- a/src/cv_analysis/utils/display.py
+++ b/src/cv_analysis/utils/display.py
@ -3,10 +3,10 @@ import os
 import cv2
 from matplotlib import pyplot as plt

-if os.environ.get("USER") == "isaac":
-    import matplotlib
+# if os.environ.get("USER") == "isaac":
+#     import matplotlib

-    matplotlib.use("module://matplotlib-backend-wezterm")
+#     matplotlib.use("module://matplotlib-backend-wezterm")


 def show_image_cv2(image, maxdim=700):
--- a/src/cv_analysis/utils/image_extraction.py
+++ b/src/cv_analysis/utils/image_extraction.py
@ -2,7 +2,7 @@ import json
 from dataclasses import dataclass
 from functools import partial
 from operator import itemgetter
-from typing import SupportsIndex, Tuple
+from typing import Literal, SupportsIndex, Tuple

 import fitz  # type: ignore
 import numpy as np
@ -54,6 +54,38 @@ def rescale_to_pdf(bbox: BBoxType, page_info: PageInfo) -> tuple[float, float, f
    return round3((bbox[0] * ratio_w, bbox[1] * ratio_h, bbox[2] * ratio_w, bbox[3] * ratio_h))


+def derotate_image(bbox: tuple[float, float, float, float], page_info: PageInfo) -> ...:
+    def mirror_horizontal(bbox, page_height):
+        x0, y0, x1, y1 = bbox
+        y0_new = page_height - y1
+        y1_new = page_height - y0
+
+        return x0, y0_new, x1, y1_new
+
+    def mirror_vertical(bbox, page_width):
+        x0, y0, x1, y1 = bbox
+        x0_new = page_width - x1
+        x1_new = page_width - x0
+
+        return x0_new, y0, x1_new, y1
+
+    logger.debug(f"{page_info.rotation=}")
+    match page_info.rotation:
+        case 0:
+            bbox = mirror_horizontal(bbox, page_info.height)
+        case 90:
+            pass
+        case 180:
+            bbox = mirror_vertical(bbox, page_info.height)
+        case 270:
+            bbox = mirror_vertical(mirror_horizontal(bbox, page_info.height), page_info.height)
+        case _:
+            logger.warning(f"Unknown rotation: {page_info.rotation}")
+            pass
+
+    return bbox
+
+
 def transform_table_lines_by_page_info(bboxes: dict, offsets: tuple, page_info: PageInfo) -> dict:

    transform = partial(rescale_to_pdf, page_info=page_info)
@ -67,9 +99,11 @@ def transform_table_lines_by_page_info(bboxes: dict, offsets: tuple, page_info:

        return (x1 + offset_x, y1 + offset_y, x2 + offset_x, y2 + offset_y)

+    derotate = partial(derotate_image, page_info=page_info)
+
    unpack = itemgetter("x1", "y1", "x2", "y2")
    pack = lambda x: {"x1": x[0], "y1": x[1], "x2": x[2], "y2": x[3]}
-    convert = compose(pack, apply_offsets, transform, unpack)
+    convert = compose(pack, apply_offsets, derotate, transform, unpack)

    table_lines = bboxes.get("tableLines", [])
    bboxes["tableLines"] = list(map(convert, table_lines))