fix(pdf conversion): repair broken bad x-ref handling
This commit is contained in:
parent
dadc0a4163
commit
6163e29d6b
@ -92,12 +92,12 @@ def get_images_on_page(doc, metadata):
|
||||
|
||||
|
||||
def extract_valid_metadata(doc: fitz.fitz.Document, page: fitz.fitz.Page):
|
||||
return compose(
|
||||
list,
|
||||
partial(add_alpha_channel_info, doc),
|
||||
filter_valid_metadata,
|
||||
get_metadata_for_images_on_page,
|
||||
)(page)
|
||||
metadata = get_metadata_for_images_on_page(page)
|
||||
metadata = filter_valid_metadata(metadata)
|
||||
metadata = add_alpha_channel_info(doc, metadata)
|
||||
|
||||
return list(metadata)
|
||||
|
||||
|
||||
|
||||
def get_metadata_for_images_on_page(page: fitz.Page):
|
||||
@ -207,7 +207,11 @@ def add_alpha_channel_info(doc, metadata):
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def load_image_handle_from_xref(doc, xref):
|
||||
return doc.extract_image(xref)
|
||||
try:
|
||||
return doc.extract_image(xref)
|
||||
except ValueError:
|
||||
logger.debug(f"Xref {xref} is invalid, skipping extraction ...")
|
||||
return
|
||||
|
||||
|
||||
rounder = rcompose(round, int)
|
||||
|
||||
@ -1,10 +1,3 @@
|
||||
import logging
|
||||
|
||||
import pytest
|
||||
|
||||
from image_prediction.utils import get_logger
|
||||
|
||||
|
||||
pytest_plugins = [
|
||||
"test.fixtures.extractor",
|
||||
"test.fixtures.image",
|
||||
@ -17,14 +10,5 @@ pytest_plugins = [
|
||||
"test.fixtures.parameters",
|
||||
"test.fixtures.pdf",
|
||||
"test.fixtures.target",
|
||||
"test.unit_tests.image_stitching_test"
|
||||
"test.unit_tests.image_stitching_test",
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def mute_logger():
|
||||
logger = get_logger()
|
||||
level = logger.level
|
||||
logger.setLevel(logging.CRITICAL + 1)
|
||||
yield
|
||||
logger.setLevel(level)
|
||||
|
||||
@ -1,48 +0,0 @@
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
from image_prediction.config import Config
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def config_file_content():
|
||||
return {"A": [{"B": [1, 2]}, {"C": 3}, 4], "D": {"E": {"F": True}}}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def config(config_file_content):
|
||||
with tempfile.NamedTemporaryFile(suffix=".yaml", mode="w") as f:
|
||||
yaml.dump(config_file_content, f, default_flow_style=False)
|
||||
yield Config(f.name)
|
||||
|
||||
|
||||
def test_dot_access_key_exists(config):
|
||||
assert config.A == [{"B": [1, 2]}, {"C": 3}, 4]
|
||||
assert config.D.E["F"]
|
||||
|
||||
|
||||
def test_access_key_exists(config):
|
||||
assert config["A"] == [{"B": [1, 2]}, {"C": 3}, 4]
|
||||
assert config["A"][0] == {"B": [1, 2]}
|
||||
assert config["A"][0]["B"] == [1, 2]
|
||||
assert config["A"][0]["B"][0] == 1
|
||||
|
||||
|
||||
def test_dot_access_key_does_not_exists(config):
|
||||
assert config.B is None
|
||||
|
||||
|
||||
def test_access_key_does_not_exists(config):
|
||||
assert config["B"] is None
|
||||
|
||||
|
||||
def test_get_method_returns_key_if_key_does_exist(config):
|
||||
dot_indexable = config.D.E
|
||||
assert dot_indexable.get("F", "default_value") is True
|
||||
|
||||
|
||||
def test_get_method_returns_default_if_key_does_not_exist(config):
|
||||
dot_indexable = config.D.E
|
||||
assert dot_indexable.get("X", "default_value") == "default_value"
|
||||
@ -1,48 +0,0 @@
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from image_prediction.exceptions import IntentionalTestException
|
||||
from image_prediction.flask import make_prediction_server
|
||||
|
||||
|
||||
def predict_fn(x: bytes):
|
||||
x = int(x.decode())
|
||||
if x == 42:
|
||||
return True
|
||||
else:
|
||||
raise IntentionalTestException("This is intended.")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def server():
|
||||
server = make_prediction_server(predict_fn)
|
||||
server.config.update({"TESTING": True})
|
||||
return server
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client(server):
|
||||
return server.test_client()
|
||||
|
||||
|
||||
def test_server_predict_success(client, mute_logger):
|
||||
response = client.post("/predict", data="42")
|
||||
assert json.loads(response.data)
|
||||
|
||||
|
||||
def test_server_predict_failure(client, mute_logger):
|
||||
response = client.post("/predict", data="13")
|
||||
assert response.status_code == 500
|
||||
|
||||
|
||||
def test_server_health_check(client):
|
||||
response = client.get("/health")
|
||||
assert response.status_code == 200
|
||||
assert response.json == "OK"
|
||||
|
||||
|
||||
def test_server_ready_check(client):
|
||||
response = client.get("/ready")
|
||||
assert response.status_code == 200
|
||||
assert response.json == "OK"
|
||||
Loading…
x
Reference in New Issue
Block a user