fixed json format and refactored service functions
This commit is contained in:
parent
88bb8dbddf
commit
9327fb7231
@ -7,6 +7,7 @@ WORKDIR /app/service
|
|||||||
|
|
||||||
COPY ./src ./src
|
COPY ./src ./src
|
||||||
COPY cv_analysis ./cv_analysis
|
COPY cv_analysis ./cv_analysis
|
||||||
|
COPY config.yaml ./config.yaml
|
||||||
|
|
||||||
RUN python3 -m pip install --upgrade pip
|
RUN python3 -m pip install --upgrade pip
|
||||||
RUN python3 -m pip install -e .
|
RUN python3 -m pip install -e .
|
||||||
|
|||||||
@ -23,5 +23,5 @@ deskew:
|
|||||||
test_dummy: test_dummy
|
test_dummy: test_dummy
|
||||||
|
|
||||||
visual_logging:
|
visual_logging:
|
||||||
level: $LOGGING_LEVEL_ROOT|DEBUG
|
level: $LOGGING_LEVEL_ROOT|INFO
|
||||||
output_folder: /tmp/debug/
|
output_folder: /tmp/debug/
|
||||||
@ -49,7 +49,7 @@ def main(args):
|
|||||||
elif operation == "layout-parsing":
|
elif operation == "layout-parsing":
|
||||||
response = requests.post("http://127.0.0.1:5000/layout", data=open(args.pdf_path, "rb"))
|
response = requests.post("http://127.0.0.1:5000/layout", data=open(args.pdf_path, "rb"))
|
||||||
else:
|
else:
|
||||||
raise ValueError("{args.operation} is not a valid value.")
|
raise ValueError(f"{args.operation} is not a valid value.")
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
predictions = response.json()
|
predictions = response.json()
|
||||||
|
|
||||||
|
|||||||
@ -8,11 +8,10 @@ from prometheus_flask_exporter import PrometheusMetrics
|
|||||||
from waitress import serve
|
from waitress import serve
|
||||||
|
|
||||||
from cv_analysis.utils import npconvert
|
from cv_analysis.utils import npconvert
|
||||||
from cv_analysis.utils.preprocessing import preprocess_pdf_image # TODO
|
from cv_analysis.table_parsing import parse_table
|
||||||
from cv_analysis.table_parsing import parse_table # , detect_tables_in_pdf
|
from cv_analysis.redaction_detection import find_redactions
|
||||||
from cv_analysis.redaction_detection import find_redactions # , detect_redactions_in_pdf
|
from cv_analysis.layout_parsing import parse_layout
|
||||||
from cv_analysis.layout_parsing import parse_layout # , detect_layout_in_pdf #TODO
|
from cv_analysis.figure_detection import detect_figures
|
||||||
from cv_analysis.figure_detection import detect_figures # , detect_figures_in_pdf #TODO
|
|
||||||
from cv_analysis.utils.logging import logger
|
from cv_analysis.utils.logging import logger
|
||||||
from cv_analysis.utils.preprocessing import open_pdf
|
from cv_analysis.utils.preprocessing import open_pdf
|
||||||
from cv_analysis.config import CONFIG
|
from cv_analysis.config import CONFIG
|
||||||
@ -44,7 +43,7 @@ def main():
|
|||||||
@metrics.summary("tables_request_time_seconds", "Time spent processing tables request")
|
@metrics.summary("tables_request_time_seconds", "Time spent processing tables request")
|
||||||
def get_tables():
|
def get_tables():
|
||||||
start_monitoring()
|
start_monitoring()
|
||||||
tables = annotate("tables")
|
tables = annotate(parse_table)
|
||||||
# page_counter.inc(npages)
|
# page_counter.inc(npages)
|
||||||
return tables
|
return tables
|
||||||
|
|
||||||
@ -52,7 +51,7 @@ def main():
|
|||||||
@metrics.summary("redactions_request_time_seconds", "Time spent processing redaction request")
|
@metrics.summary("redactions_request_time_seconds", "Time spent processing redaction request")
|
||||||
def get_redactions():
|
def get_redactions():
|
||||||
start_monitoring()
|
start_monitoring()
|
||||||
redactions = annotate("redactions")
|
redactions = annotate(find_redactions)
|
||||||
# page_counter.inc(npages)
|
# page_counter.inc(npages)
|
||||||
return redactions
|
return redactions
|
||||||
|
|
||||||
@ -60,7 +59,7 @@ def main():
|
|||||||
@metrics.summary("figures_request_time_seconds", "Time spent processing figures request")
|
@metrics.summary("figures_request_time_seconds", "Time spent processing figures request")
|
||||||
def get_figures():
|
def get_figures():
|
||||||
start_monitoring()
|
start_monitoring()
|
||||||
figures = annotate("figures")
|
figures = annotate(detect_figures)
|
||||||
# page_counter.inc(npages)
|
# page_counter.inc(npages)
|
||||||
return figures
|
return figures
|
||||||
|
|
||||||
@ -68,7 +67,7 @@ def main():
|
|||||||
@metrics.summary("layout_request_time_seconds", "Time spent processing layout request")
|
@metrics.summary("layout_request_time_seconds", "Time spent processing layout request")
|
||||||
def get_layout():
|
def get_layout():
|
||||||
start_monitoring()
|
start_monitoring()
|
||||||
layout = annotate("layout")
|
layout = annotate(parse_layout)
|
||||||
# page_counter.inc(npages)
|
# page_counter.inc(npages)
|
||||||
return layout
|
return layout
|
||||||
|
|
||||||
@ -77,7 +76,6 @@ def main():
|
|||||||
response = "OK"
|
response = "OK"
|
||||||
return jsonify(response)
|
return jsonify(response)
|
||||||
|
|
||||||
# predictor = initialize_predictor()
|
|
||||||
logger.info("<3 Annotator ready.")
|
logger.info("<3 Annotator ready.")
|
||||||
|
|
||||||
mode = CONFIG.webserver.mode
|
mode = CONFIG.webserver.mode
|
||||||
@ -89,46 +87,40 @@ def main():
|
|||||||
tracemalloc.stop()
|
tracemalloc.stop()
|
||||||
|
|
||||||
|
|
||||||
def apply_annotation_function(annotation_function, page_list):
|
|
||||||
outdict = {}
|
|
||||||
for i, page in enumerate(page_list):
|
|
||||||
results = annotation_function(page)
|
|
||||||
if results:
|
|
||||||
outdict.update({i: results})
|
|
||||||
return outdict
|
|
||||||
|
|
||||||
|
def make_annotations(pdf, annotation_function):
|
||||||
def make_annotations(pdf, task):
|
results = []
|
||||||
if task == "tables":
|
for i, page in enumerate(pdf):
|
||||||
annotation = {"tables": apply_annotation_function(parse_table, pdf)}
|
boxes = annotation_function(page)
|
||||||
elif task == "redactions":
|
cells= []
|
||||||
annotation = {"redactions": apply_annotation_function(find_redactions, pdf)}
|
if boxes:
|
||||||
elif task == "figures":
|
cells = [{"x": x, "y": y, "width": w, "height": h} for x,y,w,h in boxes]
|
||||||
annotation = {"figures": apply_annotation_function(detect_figures, pdf)}
|
results.append({
|
||||||
elif task == "layout":
|
"page": i,
|
||||||
annotation = {"layout": apply_annotation_function(parse_layout, pdf)}
|
"pageWidth": page.shape[1],
|
||||||
else:
|
"pageHeight": page.shape[0],
|
||||||
raise ValueError(
|
"cells": cells
|
||||||
f"'{task}' is not a valid operation keyword. Valid values include: \
|
})
|
||||||
\ntables\nredactions\nfigures\nlayout\n"
|
logger.info(str(results))
|
||||||
)
|
logger.info(type(results))
|
||||||
|
output_dict = {"pages": results}
|
||||||
return json.dumps(annotation, default=npconvert)
|
return jsonify(json.dumps(output_dict, default=npconvert))
|
||||||
|
|
||||||
|
|
||||||
def get_size(data):
|
def get_size(data):
|
||||||
return round(getsizeof(data) / 1000000, 2)
|
return round(getsizeof(data) / 1000000, 2)
|
||||||
|
|
||||||
|
|
||||||
def annotate(task):
|
def annotate(annotation_function):
|
||||||
def inner():
|
def inner():
|
||||||
data = request.data
|
data = request.data
|
||||||
logger.info(f"Received data.")
|
logger.info(f"Received data.")
|
||||||
logger.info(f"Processing data.")
|
logger.info(f"Processing data.")
|
||||||
pdf, angles = open_pdf(data)
|
pdf, angles = open_pdf(data)
|
||||||
# npages = len(pdf)
|
annotations = make_annotations(pdf, annotation_function)
|
||||||
annotations = make_annotations(pdf, task)
|
#if CONFIG.deskew.function != "identity":
|
||||||
return jsonify({"result": annotations, "deskew_angles": angles})
|
# annotations.update({"deskew_angles": angles})
|
||||||
|
return annotations
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return inner()
|
return inner()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user