move and fix layout tests from redaction-service

This commit is contained in:
Kilian Schuettler 2023-07-24 19:43:25 +02:00
parent 47fd8e05d1
commit 143ebee25e
98 changed files with 56741 additions and 41 deletions

View File

@ -12,9 +12,9 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class DocumentData {
DocumentPage[] pages;
DocumentText[] atomicTextBlocks;
DocumentPositions[] atomicPositionBlocks;
DocumentPage[] documentPages;
DocumentText[] documentTexts;
DocumentPositions[] documentPositions;
DocumentStructure documentStructure;

View File

@ -72,9 +72,9 @@ public class LayoutParsingStorageService {
public void storeDocumentData(LayoutParsingRequest layoutParsingRequest, DocumentData documentData) {
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.structureFileStorageId(), documentData.getDocumentStructure());
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.textBlockFileStorageId(), documentData.getAtomicTextBlocks());
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.positionBlockFileStorageId(), documentData.getAtomicPositionBlocks());
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.pageFileStorageId(), documentData.getPages());
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.textBlockFileStorageId(), documentData.getDocumentTexts());
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.positionBlockFileStorageId(), documentData.getDocumentPositions());
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.pageFileStorageId(), documentData.getDocumentPages());
}
@ -103,9 +103,9 @@ public class LayoutParsingStorageService {
return DocumentData.builder()
.documentStructure(tableOfContentsData)
.atomicPositionBlocks(atomicPositionBlockData)
.atomicTextBlocks(documentTextBlockData)
.pages(documentPageData)
.documentPositions(atomicPositionBlockData)
.documentTexts(documentTextBlockData)
.documentPages(documentPageData)
.build();
}

View File

@ -41,9 +41,9 @@ public class DocumentDataMapper {
List<DocumentPage> documentPageData = document.getPages().stream().map(DocumentDataMapper::toPageData).toList();
DocumentStructure tableOfContentsData = toDocumentTreeData(document.getDocumentTree());
return DocumentData.builder()
.atomicTextBlocks(documentTextBlockData.toArray(new DocumentText[0]))
.atomicPositionBlocks(atomicPositionBlockData.toArray(new DocumentPositions[0]))
.pages(documentPageData.toArray(new DocumentPage[0]))
.documentTexts(documentTextBlockData.toArray(new DocumentText[0]))
.documentPositions(atomicPositionBlockData.toArray(new DocumentPositions[0]))
.documentPages(documentPageData.toArray(new DocumentPage[0]))
.documentStructure(tableOfContentsData)
.build();
}

View File

@ -39,13 +39,13 @@ public class DocumentGraphMapper {
DocumentTree documentTree = new DocumentTree(document);
Context context = new Context(documentData, documentTree);
context.pages.addAll(Arrays.stream(documentData.getPages()).map(DocumentGraphMapper::buildPage).toList());
context.pages.addAll(Arrays.stream(documentData.getDocumentPages()).map(DocumentGraphMapper::buildPage).toList());
context.documentTree.getRoot().getChildren().addAll(buildEntries(documentData.getDocumentStructure().getRoot().getChildren(), context));
document.setDocumentTree(context.documentTree);
document.setPages(new HashSet<>(context.pages));
document.setNumberOfPages(documentData.getPages().length);
document.setNumberOfPages(documentData.getDocumentPages().length);
document.setTextBlock(document.getTextBlock());
return document;
@ -188,8 +188,8 @@ public class DocumentGraphMapper {
this.documentTree = documentTree;
this.pages = new LinkedList<>();
this.documentTextBlockData = Arrays.stream(documentData.getAtomicTextBlocks()).toList();
this.atomicPositionBlockData = Arrays.stream(documentData.getAtomicPositionBlocks()).toList();
this.documentTextBlockData = Arrays.stream(documentData.getDocumentTexts()).toList();
this.atomicPositionBlockData = Arrays.stream(documentData.getDocumentPositions()).toList();
}

View File

@ -0,0 +1,157 @@
package com.knecon.fforesight.service.layoutparser.server;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.pdfbox.Loader;
import org.assertj.core.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.FilterType;
import org.springframework.context.annotation.Import;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ChangeType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipeline;
import com.knecon.fforesight.service.layoutparser.processor.adapter.model.image.ImageServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.adapter.model.table.TableServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.SemanticNode;
import com.knecon.fforesight.tenantcommons.TenantsClient;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.SneakyThrows;
import lombok.ToString;
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(HeadlinesGoldStandardIntegrationTest.RedactionIntegrationTestConfiguration.class)
public class HeadlinesGoldStandardIntegrationTest {
@MockBean
private TenantsClient tenantsClient;
@MockBean
private RabbitTemplate rabbitTemplate;
@Autowired
private ObjectMapper objectMapper;
@Autowired
private StorageService storageService;
@Autowired
private LayoutParsingPipeline layoutParsingPipeline;
@Test
public void testHeadlineDetection() {
List<Metrics> metrics = new ArrayList<>();
metrics.add(getMetrics("files/headlineTest/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf",
"files/headlineTest/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1)_REDACTION_LOG.json"));
metrics.add(getMetrics("files/headlineTest/91 Trinexapac-ethyl_RAR_01_Volume_1_2018-02-23.pdf",
"files/headlineTest/91 Trinexapac-ethyl_RAR_01_Volume_1_2018-02-23_REDACTION_LOG.json"));
metrics.add(getMetrics("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf", "files/headlineTest/S-Metolachlor_RAR_01_Volume_1_2018-09-06_REDACTION_LOG.json"));
double precision = metrics.stream().mapToDouble(Metrics::getPrecision).average().orElse(1.0);
double recall = metrics.stream().mapToDouble(Metrics::getRecall).average().orElse(1.0);
System.out.println("Precision is: " + precision + " recall is: " + recall);
Assertions.assertThat(precision).isGreaterThanOrEqualTo(0.44f);
Assertions.assertThat(recall).isGreaterThanOrEqualTo(0.69f);
}
@SneakyThrows
private Metrics getMetrics(String filePath, String redactionLogUrl) {
ClassPathResource redactionLogResource = new ClassPathResource(redactionLogUrl);
ClassPathResource pdfFileResource = new ClassPathResource(filePath);
Set<Headline> goldStandardHeadlines = new HashSet<>();
var goldStandardLog = objectMapper.readValue(redactionLogResource.getInputStream(), RedactionLog.class);
goldStandardLog.getRedactionLogEntry().removeIf(r -> !r.isRedacted() || r.getChanges().get(r.getChanges().size() - 1).getType().equals(ChangeType.REMOVED));
goldStandardLog.getRedactionLogEntry().forEach(e -> goldStandardHeadlines.add(new Headline(e.getPositions().get(0).getPage(), e.getValue())));
Document documentGraph = layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
Loader.loadPDF(pdfFileResource.getInputStream()),
new ImageServiceResponse(),
new TableServiceResponse());
var foundHeadlines = documentGraph.streamAllSubNodes()
.map(SemanticNode::getHeadline)
.distinct()
.map(headlineNode -> new Headline(headlineNode.getPages().stream().findFirst().get().getNumber(), headlineNode.getTextBlock().getSearchText().stripTrailing()))
.toList();
Set<Headline> correct = new HashSet<>();
Set<Headline> missing;
Set<Headline> falsePositive = new HashSet<>();
for (Headline headline : foundHeadlines) {
if (goldStandardHeadlines.contains(headline)) {
correct.add(headline);
} else {
falsePositive.add(headline);
}
}
missing = goldStandardHeadlines.stream().filter(h -> !correct.contains(h)).collect(Collectors.toSet());
float precision = (float) correct.size() / (float) foundHeadlines.size();
float recall = (float) correct.size() / ((float) correct.size() + (float) missing.size());
return new Metrics(precision, recall);
}
@Configuration
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class/*, StorageAutoConfiguration.class*/})
@ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)})
public static class RedactionIntegrationTestConfiguration {
}
@Data
@EqualsAndHashCode
@AllArgsConstructor
@ToString
private class Metrics {
private float precision;
private float recall;
}
@Data
@EqualsAndHashCode
@AllArgsConstructor
@ToString
private class Headline {
private int page;
private String headline;
}
}

View File

@ -30,9 +30,9 @@ public class DocumentGraphJsonWritingTest extends BuildDocumentGraphTest {
DocumentData documentData = DocumentDataMapper.toDocumentData(documentGraph);
ObjectMapper mapper = ObjectMapperFactory.create();
mapper.writeValue(new FileOutputStream(File.createTempFile(filename + "_structure", ".json")), documentData.getDocumentStructure());
mapper.writeValue(new FileOutputStream(File.createTempFile(filename + "_text", ".json")), documentData.getAtomicTextBlocks());
mapper.writeValue(new FileOutputStream(File.createTempFile(filename + "_positions", ".json")), documentData.getAtomicPositionBlocks());
mapper.writeValue(new FileOutputStream(File.createTempFile(filename + "_pages", ".json")), documentData.getPages());
mapper.writeValue(new FileOutputStream(File.createTempFile(filename + "_text", ".json")), documentData.getDocumentTexts());
mapper.writeValue(new FileOutputStream(File.createTempFile(filename + "_positions", ".json")), documentData.getDocumentPositions());
mapper.writeValue(new FileOutputStream(File.createTempFile(filename + "_pages", ".json")), documentData.getDocumentPages());
}
}

View File

@ -1,14 +1,22 @@
package com.knecon.fforesight.service.layoutparser.server.graph;
import org.junit.jupiter.api.Disabled;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import org.junit.jupiter.api.Test;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositions;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentText;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.Table;
import com.knecon.fforesight.service.layoutparser.processor.mapper.redaction.DocumentDataMapper;
import com.knecon.fforesight.service.layoutparser.processor.mapper.redaction.DocumentGraphMapper;
import com.knecon.fforesight.service.layoutparser.processor.mapper.taas.TaasDocumentDataMapper;
import com.knecon.fforesight.service.layoutparser.processor.mapper.redaction.PropertiesMapper;
import com.knecon.fforesight.tenantcommons.TenantContext;
import lombok.SneakyThrows;
@ -16,22 +24,64 @@ public class DocumentGraphMappingTest extends BuildDocumentGraphTest {
@Test
@SneakyThrows
@Disabled
public void testGraphMapping() {
Document document = buildGraph("files/crafted document");
LayoutParsingRequest layoutParsingRequest = buildStandardLayoutParsingRequest();
String filename = "files/new/crafted document";
Document document = buildGraph(filename);
DocumentData documentData = DocumentDataMapper.toDocumentData(document);
var researchDocumentData = TaasDocumentDataMapper.fromDocument(document);
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, documentData);
storageService.storeJSONObject(TenantContext.getTenantId(), filename + "_PAGES" + ".json", documentData.getDocumentPages());
storageService.storeJSONObject(TenantContext.getTenantId(), filename + "_TEXT" + ".json", documentData.getDocumentTexts());
storageService.storeJSONObject(TenantContext.getTenantId(), filename + "_POSITIONS" + ".json", documentData.getDocumentPositions());
storageService.storeJSONObject(TenantContext.getTenantId(), filename + "_STRUCTURE" + ".json", documentData.getDocumentStructure());
DocumentData documentData2 = layoutParsingStorageService.readDocumentData(layoutParsingRequest);
Document newDocumentGraph = DocumentGraphMapper.toDocumentGraph(documentData2);
DocumentPage[] pageData = storageService.readJSONObject(TenantContext.getTenantId(), filename + "_PAGES" + ".json", DocumentPage[].class);
DocumentText[] atomicTextBlockData = storageService.readJSONObject(TenantContext.getTenantId(), filename + "_TEXT" + ".json", DocumentText[].class);
DocumentPositions[] atomicPositionBlockData = storageService.readJSONObject(TenantContext.getTenantId(),
filename + "_POSITIONS" + ".json",
DocumentPositions[].class);
DocumentStructure documentTreeData = storageService.readJSONObject(TenantContext.getTenantId(), filename + "_STRUCTURE" + ".json", DocumentStructure.class);
assert document.toString().equals(newDocumentGraph.toString());
assert document.getDocumentTree().toString().equals(newDocumentGraph.getDocumentTree().toString());
DocumentData documentData2 = DocumentData.builder()
.documentPages(pageData)
.documentStructure(documentTreeData)
.documentTexts(atomicTextBlockData)
.documentPositions(atomicPositionBlockData)
.build();
Document newDocument = DocumentGraphMapper.toDocumentGraph(documentData2);
assertTrue(allTablesHavePositiveNumberOfRowsAndColumns(document));
assertTrue(allTablesHavePositiveNumberOfRowsAndColumns(documentData));
assertTrue(allTablesHavePositiveNumberOfRowsAndColumns(documentData2));
assertTrue(allTablesHavePositiveNumberOfRowsAndColumns(newDocument));
assertEquals(document.toString(), newDocument.toString());
assertEquals(document.getDocumentTree().toString(), newDocument.getDocumentTree().toString());
}
private static boolean allTablesHavePositiveNumberOfRowsAndColumns(Document document) {
return document.streamAllSubNodes()
.filter(semanticNode -> semanticNode instanceof Table)
.map(semanticNode -> (Table) semanticNode)
.allMatch(tableNode -> tableNode.getNumberOfCols() > 0 && tableNode.getNumberOfRows() > 0);
}
private static boolean allTablesHavePositiveNumberOfRowsAndColumns(DocumentData documentData) {
return documentData.getDocumentStructure()
.streamAllEntries()
.filter(entryData -> entryData.getType().equals(NodeType.TABLE))
.map(DocumentStructure.EntryData::getProperties)
.map(properties -> {
var builder = Table.builder();
PropertiesMapper.parseTableProperties(properties, builder);
return builder.build();
})
.allMatch(tableNode -> tableNode.getNumberOfCols() > 0 && tableNode.getNumberOfRows() > 0);
}
}

View File

@ -0,0 +1,60 @@
package com.knecon.fforesight.service.layoutparser.server.model;
import static org.assertj.core.api.Assertions.assertThat;
import java.io.ByteArrayInputStream;
import java.nio.charset.StandardCharsets;
import org.apache.pdfbox.util.Matrix;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.storage.commons.properties.StorageProperties;
import com.iqser.red.storage.commons.service.ObjectSerializer;
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextDirection;
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
import lombok.SneakyThrows;
public class TextPositionSequenceTest {
private static final String TEXT_POSITION_SEQUENCE_AS_JSON = "{\n" //
+ " \"page\": 1,\n" //
+ " \"textPositions\": [],\n" //
+ " \"dir\": 180.0,\n" //
+ " \"rotation\": 0,\n" //
+ " \"pageHeight\": 800,\n" //
+ " \"pageWidth\": 600\n" //
+ "}";
private final ObjectSerializer objectSerializer = new ObjectSerializer(new ObjectMapper(), new StorageProperties());
@Test
@SneakyThrows
public void testDeserializationWithJackson() {
TextPositionSequence textPositionSequence = objectSerializer.deserializeWithJackson(new ByteArrayInputStream(TEXT_POSITION_SEQUENCE_AS_JSON.getBytes(StandardCharsets.UTF_8)),
TextPositionSequence.class);
assertPropertiesAfterJsonDeserialization(textPositionSequence);
}
private void assertPropertiesAfterJsonDeserialization(TextPositionSequence textPositionSequence) {
assertThat(textPositionSequence.getPage()).isEqualTo(1);
assertThat(textPositionSequence.getTextPositions()).hasSize(0);
assertThat(textPositionSequence.getDir()).isEqualTo(TextDirection.HALF_CIRCLE);
assertThat(textPositionSequence.getRotation()).isEqualTo(0);
assertThat(textPositionSequence.getPageHeight()).isEqualTo(800f);
assertThat(textPositionSequence.getPageWidth()).isEqualTo(600f);
}
private Matrix createIdentityMatrix() {
return new Matrix();
}
}

View File

@ -0,0 +1,568 @@
package com.knecon.fforesight.service.layoutparser.server.segmentation;
import static org.assertj.core.api.Assertions.assertThat;
import java.awt.geom.Rectangle2D;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.FilterType;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.service.layoutparser.processor.adapter.CvTableParsingAdapter;
import com.knecon.fforesight.service.layoutparser.processor.adapter.ImageServiceResponseAdapter;
import com.knecon.fforesight.service.layoutparser.processor.adapter.model.image.ImageServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.adapter.model.table.TableServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.ImageType;
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
import com.knecon.fforesight.service.layoutparser.processor.services.PdfParsingService;
import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService;
import com.knecon.fforesight.service.layoutparser.server.Application;
import com.knecon.fforesight.service.layoutparser.server.utils.FileSystemBackedStorageService;
import lombok.SneakyThrows;
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(PdfSegmentationServiceTest.TestConfiguration.class)
public class PdfSegmentationServiceTest {
@Autowired
private PdfParsingService pdfParsingService;
@Autowired
private ObjectMapper objectMapper;
@MockBean
private RabbitTemplate rabbitTemplate;
@Autowired
private RedactManagerClassificationService redactManagerClassificationService;
@Autowired
private CvTableParsingAdapter cvTableParsingAdapter;
@Autowired
private ImageServiceResponseAdapter imageServiceResponseAdapter;
@Autowired
private SectionsBuilderService sectionsBuilderService;
@Configuration
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
@ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)})
public static class TestConfiguration {
@Bean
@Primary
public StorageService inmemoryStorage() {
return new FileSystemBackedStorageService();
}
}
public ClassificationDocument buildClassificationDocument(PDDocument originDocument) {
ClassificationDocument classificationDocument = pdfParsingService.parseDocument(LayoutParsingType.REDACT_MANAGER,
originDocument,
cvTableParsingAdapter.buildCvParsedTablesPerPage(new TableServiceResponse()),
imageServiceResponseAdapter.buildClassifiedImagesPerPage(new ImageServiceResponse()));
redactManagerClassificationService.classifyDocument(classificationDocument);
sectionsBuilderService.buildSections(classificationDocument);
return classificationDocument;
}
@Test
@SneakyThrows
public void testMapping() {
ClassPathResource responseJson = new ClassPathResource("files/image_response.json");
ImageServiceResponse imageServiceResponse = objectMapper.readValue(responseJson.getInputStream(), ImageServiceResponse.class);
Map<Integer, List<ClassifiedImage>> images = new HashMap<>();
imageServiceResponse.getData()
.forEach(imageMetadata -> images.computeIfAbsent(imageMetadata.getPosition().getPageNumber(), x -> new ArrayList<>())
.add(new ClassifiedImage(new Rectangle2D.Double(imageMetadata.getPosition().getX1(),
imageMetadata.getPosition().getY1(),
imageMetadata.getGeometry().getWidth(),
imageMetadata.getGeometry().getHeight()),
ImageType.valueOf(imageMetadata.getClassification().getLabel().toUpperCase(Locale.ROOT)),
imageMetadata.isAlpha(),
imageMetadata.getPosition().getPageNumber())));
System.out.println("object");
}
@Test
public void testPDFSegmentationWithComplexTable() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
TablePageBlock table = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList()).get(0);
assertThat(table.getColCount()).isEqualTo(6);
assertThat(table.getRowCount()).isEqualTo(13);
assertThat(table.getRows().stream().mapToInt(List::size).sum()).isEqualTo(6 * 13);
}
@Test
public void testTableExtraction() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Table.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList()).get(0);
assertThat(firstTable.getColCount()).isEqualTo(8);
assertThat(firstTable.getRowCount()).isEqualTo(1);
TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList()).get(1);
assertThat(secondTable.getColCount()).isEqualTo(8);
assertThat(secondTable.getRowCount()).isEqualTo(2);
List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(0).stream().map(Collections::singletonList).collect(Collectors.toList());
assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
}
@Test
public void testMultiPageMetadataPropagation() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Multi Page Table.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList()).get(0);
assertThat(firstTable.getColCount()).isEqualTo(9);
assertThat(firstTable.getRowCount()).isEqualTo(5);
TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList()).get(1);
assertThat(secondTable.getColCount()).isEqualTo(9);
assertThat(secondTable.getRowCount()).isEqualTo(6);
List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(firstTable.getRowCount() - 1).stream().map(Cell::getHeaderCells).collect(Collectors.toList());
assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
}
@Test
public void testHeaderCellsForRotatedTable() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Rotated Table Headers.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList()).get(0);
assertThat(firstTable.getColCount()).isEqualTo(8);
assertThat(firstTable.getRowCount()).isEqualTo(1);
TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList()).get(1);
assertThat(secondTable.getColCount()).isEqualTo(8);
assertThat(secondTable.getRowCount()).isEqualTo(6);
List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(0).stream().map(Collections::singletonList).collect(Collectors.toList());
assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
}
@Test
public void testDoc56Page170() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/56 Fludioxonil_RAR_12_Volume_3CA_B-7_2018-02-21_Page170.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
validateTableSize(document, 4);
validateTable(document, 0, 1, 1, 0, 0);
validateTable(document, 1, 2, 2, 0, 0);
validateTable(document, 2, 7, 20, 0, 140);
validateTable(document, 3, 8, 31, 0, 170);
}
@Test
public void testVV931175Page1() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/VV-931175_Page1.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
validateTableSize(document, 1);
validateTable(document, 0, 8, 8, 0, 2);
List<List<String>> values = Arrays.asList(Arrays.asList("Annex point Reference within DAR/RAR",
"Author, date",
"Study title",
"Analytical method Author, date, No.",
"Technique, LOQ of the method, validated working range",
"Method meets analytical validation criteria",
"Remarks (in case validation criteria are not met)",
"Acceptability of the method"),
Arrays.asList("",
"Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
"Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
"Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
"Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
"",
"Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
"Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies"),
Arrays.asList("CA 7.1.2.1.1 DAR (2009)",
"Evans P.G. 2001 TMJ4569B, VV-323245",
"Azoxystrobin Laboratory Degradation Study in Three Soil Types, Sampled from Holland and the United Kingdom",
"Method: RAM 269 Johnson R.I., Tummon O.J., Earl M. 1995 RJ1864B, VV-377731 Johnson R.I., Tummon O.J., Earl M. 1998 RAM 269/02, VV-124072 Johnson R.I., Tummon O.J., Earl M. 2000 RAM 269/03, VV-123986 Validation: Robinson N.J. 2001 TMJ4617B, VV-895845 in a Trial Carried",
"LC-MS/MS LOQ: 0.01 mg/kg (R401553 (SYN50165 7), R402173 (SYN501114 )) or 0.02mg/kg (azoxystrobin, R230310, R234886) Working range: 0.02-1.0 or 0.01-0.5 mg/kg (depending on analyte) Other supporting quantificati on methods: HPLC-UV GC-MSD",
"Y",
"N/A",
"Y"));
validateTable(document, 0, values);
}
@Test
public void testDoc27Page6() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/27 A8637C - EU AIR3 - MCP Section 1 - Identity of the plant protection product_Page6.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
validateTableSize(document, 4);
validateTable(document, 0, 3, 2, 0, 0);
validateTable(document, 1, 3, 2, 0, 0);
validateTable(document, 2, 3, 3, 0, 0);
validateTable(document, 3, 3, 3, 0, 0);
}
@Test
public void testDocA20622APartB9Page185() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izRMS (CZ) fRR Part B9_Page185.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
validateTableSize(document, 2);
validateTable(document, 0, 5, 5, 0, 23);
validateTable(document, 1, 11, 9, 0, 36);
}
@Test
public void testDocA20622APartB7Page123() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izZRMS (CZ) fRR Part B7_Page123.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
validateTableSize(document, 6);
validateTable(document, 0, 2, 1, 0, 0);
validateTable(document, 1, 2, 1, 0, 0);
validateTable(document, 2, 2, 5, 0, 0);
validateTable(document, 3, 2, 5, 0, 0);
validateTable(document, 4, 2, 4, 0, 0);
validateTable(document, 5, 2, 1, 0, 0);
}
@Test
public void testDoc77Page111() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/77 Pirimicarb_RAR_08_Volume_3CA_B-6_2017-12-04_Page11.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
validateTableSize(document, 3);
validateTable(document, 0, 7, 9, 0, 0);
validateTable(document, 1, 2, 1, 0, 0);
validateTable(document, 2, 2, 10, 0, 0);
}
@Test
public void testDoc95Page532() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/95 Trinexapac-ethyl_RAR_08_Volume_3CA_B-6_2018-01-10_Page532.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
validateTableSize(document, 1);
validateTable(document, 0, 9, 9, 0, 0);
}
@Test
public void testDoc52Page175() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21_Page175.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
validateTableSize(document, 1);
validateTable(document, 0, 9, 5, 6, 0);
}
@Test
public void testDoc52Page174() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21_Page174.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
validateTableSize(document, 1);
validateTable(document, 0, 9, 6, 7, 0);
}
@Test
public void testDoc19Page35() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017_Page35.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
validateTableSize(document, 1);
validateTable(document, 0, 10, 6, 0, 1);
}
@Test
public void testDoc19Page161() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017_Page161.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
validateTableSize(document, 2);
validateTable(document, 0, 2, 2, 0, 0);
validateTable(document, 1, 1, 1, 0, 0);
}
@Test
public void testDoc47Page30() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource(
"files/SinglePages/47 Cyprodinil - EU AIR3 - MCA Section 5 Supplement - Toxicological and metabolism studies on the active substance_Page30.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
validateTableSize(document, 2);
validateTable(document, 0, 7, 8, 1, 0);
validateTable(document, 1, 7, 8, 1, 0);
}
@Test
public void testDoc49Page61() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource(
"files/SinglePages/49 Cyprodinil - EU AIR3 - MCA Section 8 Supplement - Ecotoxicological studies on the active substance_Page61.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
validateTableSize(document, 2);
validateTable(document, 0, 4, 17, 0, 0);
validateTable(document, 1, 7, 12, 0, 0);
}
@Test
public void testDoc81Page54() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/81 Pirimicarb_RAR_20_Volume_3CP_A10788A (_Pirimor_)_B-9_2017-12-04_Page54.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
validateTableSize(document, 2);
validateTable(document, 0, 5, 14, 4, 0);
validateTable(document, 1, 7, 12, 0, 0);
}
@Test
public void testDoc88Page134() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/85 Pydiflumetofen_DAR_08_Volume_3CA_B-6_2017-07-26_Page134.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
validateTableSize(document, 2);
validateTable(document, 0, 5, 17, 3, 0);
validateTable(document, 1, 5, 16, 2, 0);
}
@Test
public void testDocThiabendazolePage18() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/Thiabendazole DAR Addendum for ED_April_2020_Page18.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
validateTableSize(document, 4);
validateTable(document, 0, 4, 4, 0, 0);
validateTable(document, 1, 1, 1, 0, 0);
validateTable(document, 2, 2, 3, 0, 0);
validateTable(document, 3, 1, 1, 0, 0);
}
@Test
public void testDoc15Page18() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/15 - Pretilachlor - Acute Oral Toxicity (Up and Down Procedure) - Rat_Page18.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
validateTableSize(document, 1);
validateTable(document, 0, 11, 8, 0, 0);
}
@Test
public void testDoc28Page23() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource(
"files/SinglePages/28 A8637C - EU AIR3 - MCP Section 10 - Ecotoxicological studies on the plant protection product_Page23.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
validateTableSize(document, 2);
validateTable(document, 0, 6, 8, 0, 2);
validateTable(document, 1, 6, 8, 0, 1);
}
@Test
public void testDoc24Page17() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/24 - SYN549522 - Acute Oral Toxicity - Rats_Page17.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
validateTableSize(document, 1);
validateTable(document, 0, 9, 5, 2, 0);
}
@Test
public void testDoc30Page5() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/30 - Dicamba - Acute Oral Toxicity - Rats_Page5.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
validateTableSize(document, 1);
validateTable(document, 0, 3, 5, 0, 0);
}
private void validateTable(ClassificationDocument document, int tableIndex, int colCount, int rowCount, int emptyCellsCountCorrect, int emptyCellsCountIncorrect) {
TablePageBlock table = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(tableIndex);
List<List<Cell>> rows = table.getRows();
int emptyCellsFoundFound = rows.stream().flatMap(List::stream).toList().stream().filter(f -> f.toString().equals("")).toList().size();
assertThat(emptyCellsFoundFound).isEqualTo(emptyCellsCountCorrect + emptyCellsCountIncorrect);
assertThat(table.getColCount()).isEqualTo(colCount);
assertThat(table.getRowCount()).isEqualTo(rowCount);
}
private void validateTable(ClassificationDocument document, int tableIndex, List<List<String>> values) {
TablePageBlock table = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(tableIndex);
List<List<Cell>> rows = table.getRows();
List<Cell> rowsFlattened = rows.stream().flatMap(List::stream).toList();
List<String> valuesFlattened = values.stream().flatMap(List::stream).toList();
for (int i = 0; i < valuesFlattened.size(); i++) {
Cell cell = rowsFlattened.get(i);
String value = valuesFlattened.get(i);
assertThat(cell.toString()).isEqualTo(value);
}
}
private void validateTableSize(ClassificationDocument document, int tableSize) {
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().size()).isEqualTo(tableSize);
}
}

View File

@ -12,17 +12,15 @@ import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.iqser.red.storage.commons.service.StorageService;
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingStorageService;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingStorageService;
import com.knecon.fforesight.service.layoutparser.server.Application;
import com.knecon.fforesight.tenantcommons.TenantContext;
import com.knecon.fforesight.tenantcommons.TenantsClient;
@ -151,13 +149,6 @@ public class BaseTest {
@ComponentScan("com.knecon.fforesight.service.layoutparser")
public static class TestConfiguration {
@Bean
@Primary
public StorageService inMemoryStorage() {
return new FileSystemBackedStorageService();
}
}
}

View File

@ -0,0 +1,686 @@
{
"dossierId": "f889853e-4bf8-49a9-aae5-c38605c6ef40",
"fileId": "22ef63e29bb2a27db8497272336f6b32",
"data": [
{
"classification": {
"probabilities": {
"logo": 1.0,
"signature": 0.0,
"other": 0.0,
"formula": 0.0
},
"label": "logo"
},
"position": {
"x1": 89.88,
"x2": 274.20000000000005,
"y1": 716.24,
"y2": 770.0,
"pageNumber": 1
},
"geometry": {
"width": 184.32000000000005,
"height": 53.75999999999999
},
"filters": {
"geometry": {
"imageSize": {
"quotient": 0.14298074612038092,
"tooLarge": false,
"tooSmall": false
},
"imageFormat": {
"quotient": 3.42857142857143,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": true
}
},
{
"classification": {
"probabilities": {
"signature": 0.999968,
"logo": 1.6e-05,
"other": 1.6e-05,
"formula": 0.0
},
"label": "signature"
},
"position": {
"x1": -0.10000600000000001,
"x2": 595.099994,
"y1": -0.07998660000000002,
"y2": 842.0800134,
"pageNumber": 3
},
"geometry": {
"width": 595.2,
"height": 842.16
},
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0000782051152328,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.706754060986036,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"probabilities": {
"signature": 0.999872,
"other": 7.9e-05,
"logo": 4.8e-05,
"formula": 0.0
},
"label": "signature"
},
"position": {
"x1": -0.10000600000000001,
"x2": 595.099994,
"y1": -0.07998660000000002,
"y2": 842.0800134,
"pageNumber": 7
},
"geometry": {
"width": 595.2,
"height": 842.16
},
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0000782051152328,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.706754060986036,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"probabilities": {
"signature": 0.996366,
"other": 0.00,
"logo": 2.3e-05,
"formula": 4e-06
},
"label": "signature"
},
"position": {
"x1": -0.10000600000000001,
"x2": 595.099994,
"y1": -0.07998660000000002,
"y2": 842.0800134,
"pageNumber": 8
},
"geometry": {
"width": 595.2,
"height": 842.16
},
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0002630764355351,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.706754060986036,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"probabilities": {
"signature": 0.999772,
"logo": 0.000131,
"other": 9.7e-05,
"formula": 0.0
},
"label": "signature"
},
"position": {
"x1": 82.59443842482001,
"x2": 512.6365568843402,
"y1": 116.943736387567,
"y2": 725.0718450317352,
"pageNumber": 73
},
"geometry": {
"width": 430.04211845952017,
"height": 608.1281086441682
},
"filters": {
"geometry": {
"imageSize": {
"quotient": 0.72236755521117,
"tooLarge": false,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7071571143427432,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": true
}
},
{
"classification": {
"probabilities": {
"signature": 1.0,
"logo": 0.0,
"other": 0.0,
"formula": 0.0
},
"label": "signature"
},
"position": {
"x1": 328.20483600000006,
"x2": 393.94460940000005,
"y1": 175.1643178,
"y2": 203.92865619999998,
"pageNumber": 81
},
"geometry": {
"width": 65.73977339999999,
"height": 28.764338399999986
},
"filters": {
"geometry": {
"imageSize": {
"quotient": 0.06142518774572455,
"tooLarge": false,
"tooSmall": false
},
"imageFormat": {
"quotient": 2.2854609929078022,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": true
}
},
{
"classification": {
"probabilities": {
"signature": 1.0,
"logo": 0.0,
"other": 0.0,
"formula": 0.0
},
"label": "signature"
},
"position": {
"x1": 136.5955818,
"x2": 224.72461859999999,
"y1": 175.1133172,
"y2": 203.97965680000001,
"pageNumber": 81
},
"geometry": {
"width": 88.1290368,
"height": 28.866339600000003
},
"filters": {
"geometry": {
"imageSize": {
"quotient": 0.07124601312700823,
"tooLarge": false,
"tooSmall": false
},
"imageFormat": {
"quotient": 3.053003533568904,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": true
}
},
{
"classification": {
"probabilities": {
"logo": 1.0,
"signature": 0.0,
"other": 0.0,
"formula": 0.0
},
"label": "logo"
},
"position": {
"x1": 194.99126880000003,
"x2": 399.80967840000005,
"y1": 554.6597824,
"y2": 686.2413304,
"pageNumber": 81
},
"geometry": {
"width": 204.81840960000002,
"height": 131.581548
},
"filters": {
"geometry": {
"imageSize": {
"quotient": 0.23189275858788796,
"tooLarge": false,
"tooSmall": false
},
"imageFormat": {
"quotient": 1.5565891472868219,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": true
}
},
{
"classification": {
"probabilities": {
"other": 1.0,
"signature": 0.0,
"formula": 0.0,
"logo": 0.0
},
"label": "other"
},
"position": {
"x1": 133.9945512,
"x2": 242.52382799999998,
"y1": 411.24609519999996,
"y2": 523.2434128,
"pageNumber": 90
},
"geometry": {
"width": 108.52927679999999,
"height": 111.99731760000003
},
"filters": {
"geometry": {
"imageSize": {
"quotient": 0.15573364968831904,
"tooLarge": false,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.9690346083788703,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": true
}
},
{
"classification": {
"probabilities": {
"other": 1.0,
"signature": 0.0,
"formula": 0.0,
"logo": 0.0
},
"label": "other"
},
"position": {
"x1": 133.5865464,
"x2": 242.3198256,
"y1": 274.972492,
"y2": 387.7858192,
"pageNumber": 90
},
"geometry": {
"width": 108.7332792,
"height": 112.8133272
},
"filters": {
"geometry": {
"imageSize": {
"quotient": 0.15644678522591335,
"tooLarge": false,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.9638336347197106,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": true
}
},
{
"classification": {
"probabilities": {
"other": 1.0,
"signature": 0.0,
"formula": 0.0,
"logo": 0.0
},
"label": "other"
},
"position": {
"x1": 246.19587120000003,
"x2": 356.5611696,
"y1": 400.84197279999995,
"y2": 519.3673672,
"pageNumber": 90
},
"geometry": {
"width": 110.3652984,
"height": 118.52539440000004
},
"filters": {
"geometry": {
"imageSize": {
"quotient": 0.1615575178049721,
"tooLarge": false,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.9311531841652321,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": true
}
},
{
"classification": {
"probabilities": {
"other": 1.0,
"signature": 0.0,
"formula": 0.0,
"logo": 0.0
},
"label": "other"
},
"position": {
"x1": 244.9718568,
"x2": 358.3971912,
"y1": 274.972492,
"y2": 387.7858192,
"pageNumber": 90
},
"geometry": {
"width": 113.4253344,
"height": 112.8133272
},
"filters": {
"geometry": {
"imageSize": {
"quotient": 0.15978662903260646,
"tooLarge": false,
"tooSmall": false
},
"imageFormat": {
"quotient": 1.0054249547920433,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": true
}
},
{
"classification": {
"probabilities": {
"other": 1.0,
"signature": 0.0,
"formula": 0.0,
"logo": 0.0
},
"label": "other"
},
"position": {
"x1": 254.9679744,
"x2": 371.6573472,
"y1": 439.6024288,
"y2": 564.0438928,
"pageNumber": 91
},
"geometry": {
"width": 116.6893728,
"height": 124.441464
},
"filters": {
"geometry": {
"imageSize": {
"quotient": 0.17021718544102565,
"tooLarge": false,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.9377049180327869,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": true
}
},
{
"classification": {
"probabilities": {
"other": 1.0,
"signature": 0.0,
"formula": 0.0,
"logo": 0.0
},
"label": "other"
},
"position": {
"x1": 133.9945512,
"x2": 249.663912,
"y1": 443.07046959999997,
"y2": 687.2613424,
"pageNumber": 91
},
"geometry": {
"width": 115.66936080000002,
"height": 244.19087280000002
},
"filters": {
"geometry": {
"imageSize": {
"quotient": 0.23739910530627284,
"tooLarge": false,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.4736842105263158,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": true
}
},
{
"classification": {
"probabilities": {
"other": 1.0,
"signature": 0.0,
"formula": 0.0,
"logo": 0.0
},
"label": "other"
},
"position": {
"x1": 105.84222,
"x2": 374.870385,
"y1": 526.40545,
"y2": 687.05734,
"pageNumber": 92
},
"geometry": {
"width": 269.028165,
"height": 160.65188999999998
},
"filters": {
"geometry": {
"imageSize": {
"quotient": 0.2936614851112628,
"tooLarge": false,
"tooSmall": false
},
"imageFormat": {
"quotient": 1.6746031746031749,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": true
}
},
{
"classification": {
"probabilities": {
"logo": 0.788068,
"other": 0.152259,
"formula": 0.036883,
"signature": 0.02279
},
"label": "logo"
},
"position": {
"x1": 44.64999049990001,
"x2": 550.5759424999001,
"y1": 63.286004150029996,
"y2": 778.72242095003,
"pageNumber": 94
},
"geometry": {
"width": 505.92595200000005,
"height": 715.4364168000001
},
"filters": {
"geometry": {
"imageSize": {
"quotient": 0.8498341845521462,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7071571143427431,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"probabilities": {
"signature": 0.998335,
"logo": 0.000955,
"other": 0.000703,
"formula": 7e-06
},
"label": "signature"
},
"position": {
"x1": 58.954005540029996,
"x2": 536.45979618003,
"y1": 83.94401504006001,
"y2": 758.05854296006,
"pageNumber": 95
},
"geometry": {
"width": 477.50579064000004,
"height": 674.11452792
},
"filters": {
"geometry": {
"imageSize": {
"quotient": 0.8014221863697041,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7083452007974936,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
}
]
}

File diff suppressed because one or more lines are too long