diff --git a/README.md b/README.md index bcc4793..9584d3f 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,57 @@ This repository implements computer vision based approaches for detecting and parsing visual features such as tables or previous redactions in documents. +## API + +Input message: + +```json +{ + "targetFilePath": { + "pdf": "absolute file path", + "vlp_output": "absolute file path" + }, + "responseFilePath": "absolute file path", + "operation": "table_image_inference" +} +``` + +Response is uploaded to the storage as specified in the `responseFilePath` field. The structure is as follows: + +```json +{ + ..., + "data": [ + { + 'pageNum': 0, + 'bbox': { + 'x1': 55.3407, + 'y1': 247.0246, + 'x2': 558.5602, + 'y2': 598.0585 + }, + 'uuid': '2b10c1a2-393c-4fca-b9e3-0ad5b774ac84', + 'label': 'table', + 'tableLines': [ + { + 'x1': 0, + 'y1': 16, + 'x2': 1399, + 'y2': 16 + }, + ... + ], + 'imageInfo': { + 'height': 693, + 'width': 1414 + } + }, + ... + ] +} + +``` + ## Installation ```bash @@ -31,10 +82,9 @@ The below snippet shows hot to find the outlines of previous redactions. ```python from cv_analysis.redaction_detection import find_redactions -import pdf2image +import pdf2image import numpy as np - pdf_path = ... page_index = ... diff --git a/test/unit_tests/table_inference_test.py b/test/unit_tests/table_inference_test.py index 5ee337e..6c2c4be 100644 --- a/test/unit_tests/table_inference_test.py +++ b/test/unit_tests/table_inference_test.py @@ -1,12 +1,23 @@ from cv_analysis.server.pipeline import make_image_analysis_pipeline from cv_analysis.table_inference import infer_lines -def test_table_inference(): + +def test_table_inference_smoke(): pl = make_image_analysis_pipeline(infer_lines) with open("test/test_data/article.pdf", "rb") as f: pdf_bytes = f.read() - vlp_mock = {"data": [{"page_idx": 1, "image_boxes": [{"label": "table", "x1": 0.1, "y1": 0.3, "x2": 0.4, "y2": 0.6}]}]} - output = list(pl(pdf_bytes, vlp_mock)) + vlp_mock = { + "data": [ + { + "page_idx": 1, + "boxes": [ + {"uuid": "marius-marius-gib-mir-meine-legionen-wieder", "label": "table", "box": {"x1": 100, "y1": 100, "x2": 200, "y2": 200}} + ], + } + ] + } + data = {"pdf": pdf_bytes, "vlp_output": vlp_mock} + output = list(pl(data)) lines = output[0]["tableLines"] assert len(lines) > 1 - assert all(map(lambda item: sorted(item.keys())==['x1', 'x2', 'y1', 'y2'], lines)) \ No newline at end of file + assert all(map(lambda item: sorted(item.keys()) == ["x1", "x2", "y1", "y2"], lines))