hausarbeit: Aktualisierung der Liesmich und Anpassung einer Pruefung"
This commit is contained in:
parent
20f8dcd336
commit
aefb73bf28
54
README.md
54
README.md
@ -3,6 +3,57 @@
|
||||
This repository implements computer vision based approaches for detecting and parsing visual features such as tables or
|
||||
previous redactions in documents.
|
||||
|
||||
## API
|
||||
|
||||
Input message:
|
||||
|
||||
```json
|
||||
{
|
||||
"targetFilePath": {
|
||||
"pdf": "absolute file path",
|
||||
"vlp_output": "absolute file path"
|
||||
},
|
||||
"responseFilePath": "absolute file path",
|
||||
"operation": "table_image_inference"
|
||||
}
|
||||
```
|
||||
|
||||
Response is uploaded to the storage as specified in the `responseFilePath` field. The structure is as follows:
|
||||
|
||||
```json
|
||||
{
|
||||
...,
|
||||
"data": [
|
||||
{
|
||||
'pageNum': 0,
|
||||
'bbox': {
|
||||
'x1': 55.3407,
|
||||
'y1': 247.0246,
|
||||
'x2': 558.5602,
|
||||
'y2': 598.0585
|
||||
},
|
||||
'uuid': '2b10c1a2-393c-4fca-b9e3-0ad5b774ac84',
|
||||
'label': 'table',
|
||||
'tableLines': [
|
||||
{
|
||||
'x1': 0,
|
||||
'y1': 16,
|
||||
'x2': 1399,
|
||||
'y2': 16
|
||||
},
|
||||
...
|
||||
],
|
||||
'imageInfo': {
|
||||
'height': 693,
|
||||
'width': 1414
|
||||
}
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
@ -31,10 +82,9 @@ The below snippet shows hot to find the outlines of previous redactions.
|
||||
|
||||
```python
|
||||
from cv_analysis.redaction_detection import find_redactions
|
||||
import pdf2image
|
||||
import pdf2image
|
||||
import numpy as np
|
||||
|
||||
|
||||
pdf_path = ...
|
||||
page_index = ...
|
||||
|
||||
|
||||
@ -1,12 +1,23 @@
|
||||
from cv_analysis.server.pipeline import make_image_analysis_pipeline
|
||||
from cv_analysis.table_inference import infer_lines
|
||||
|
||||
def test_table_inference():
|
||||
|
||||
def test_table_inference_smoke():
|
||||
pl = make_image_analysis_pipeline(infer_lines)
|
||||
with open("test/test_data/article.pdf", "rb") as f:
|
||||
pdf_bytes = f.read()
|
||||
vlp_mock = {"data": [{"page_idx": 1, "image_boxes": [{"label": "table", "x1": 0.1, "y1": 0.3, "x2": 0.4, "y2": 0.6}]}]}
|
||||
output = list(pl(pdf_bytes, vlp_mock))
|
||||
vlp_mock = {
|
||||
"data": [
|
||||
{
|
||||
"page_idx": 1,
|
||||
"boxes": [
|
||||
{"uuid": "marius-marius-gib-mir-meine-legionen-wieder", "label": "table", "box": {"x1": 100, "y1": 100, "x2": 200, "y2": 200}}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
data = {"pdf": pdf_bytes, "vlp_output": vlp_mock}
|
||||
output = list(pl(data))
|
||||
lines = output[0]["tableLines"]
|
||||
assert len(lines) > 1
|
||||
assert all(map(lambda item: sorted(item.keys())==['x1', 'x2', 'y1', 'y2'], lines))
|
||||
assert all(map(lambda item: sorted(item.keys()) == ["x1", "x2", "y1", "y2"], lines))
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user