move and fix layout tests from redaction-service
This commit is contained in:
parent
47fd8e05d1
commit
143ebee25e
@ -12,9 +12,9 @@ import lombok.experimental.FieldDefaults;
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class DocumentData {
|
||||
|
||||
DocumentPage[] pages;
|
||||
DocumentText[] atomicTextBlocks;
|
||||
DocumentPositions[] atomicPositionBlocks;
|
||||
DocumentPage[] documentPages;
|
||||
DocumentText[] documentTexts;
|
||||
DocumentPositions[] documentPositions;
|
||||
DocumentStructure documentStructure;
|
||||
|
||||
|
||||
|
||||
@ -72,9 +72,9 @@ public class LayoutParsingStorageService {
|
||||
public void storeDocumentData(LayoutParsingRequest layoutParsingRequest, DocumentData documentData) {
|
||||
|
||||
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.structureFileStorageId(), documentData.getDocumentStructure());
|
||||
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.textBlockFileStorageId(), documentData.getAtomicTextBlocks());
|
||||
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.positionBlockFileStorageId(), documentData.getAtomicPositionBlocks());
|
||||
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.pageFileStorageId(), documentData.getPages());
|
||||
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.textBlockFileStorageId(), documentData.getDocumentTexts());
|
||||
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.positionBlockFileStorageId(), documentData.getDocumentPositions());
|
||||
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.pageFileStorageId(), documentData.getDocumentPages());
|
||||
}
|
||||
|
||||
|
||||
@ -103,9 +103,9 @@ public class LayoutParsingStorageService {
|
||||
|
||||
return DocumentData.builder()
|
||||
.documentStructure(tableOfContentsData)
|
||||
.atomicPositionBlocks(atomicPositionBlockData)
|
||||
.atomicTextBlocks(documentTextBlockData)
|
||||
.pages(documentPageData)
|
||||
.documentPositions(atomicPositionBlockData)
|
||||
.documentTexts(documentTextBlockData)
|
||||
.documentPages(documentPageData)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
@ -41,9 +41,9 @@ public class DocumentDataMapper {
|
||||
List<DocumentPage> documentPageData = document.getPages().stream().map(DocumentDataMapper::toPageData).toList();
|
||||
DocumentStructure tableOfContentsData = toDocumentTreeData(document.getDocumentTree());
|
||||
return DocumentData.builder()
|
||||
.atomicTextBlocks(documentTextBlockData.toArray(new DocumentText[0]))
|
||||
.atomicPositionBlocks(atomicPositionBlockData.toArray(new DocumentPositions[0]))
|
||||
.pages(documentPageData.toArray(new DocumentPage[0]))
|
||||
.documentTexts(documentTextBlockData.toArray(new DocumentText[0]))
|
||||
.documentPositions(atomicPositionBlockData.toArray(new DocumentPositions[0]))
|
||||
.documentPages(documentPageData.toArray(new DocumentPage[0]))
|
||||
.documentStructure(tableOfContentsData)
|
||||
.build();
|
||||
}
|
||||
|
||||
@ -39,13 +39,13 @@ public class DocumentGraphMapper {
|
||||
DocumentTree documentTree = new DocumentTree(document);
|
||||
Context context = new Context(documentData, documentTree);
|
||||
|
||||
context.pages.addAll(Arrays.stream(documentData.getPages()).map(DocumentGraphMapper::buildPage).toList());
|
||||
context.pages.addAll(Arrays.stream(documentData.getDocumentPages()).map(DocumentGraphMapper::buildPage).toList());
|
||||
|
||||
context.documentTree.getRoot().getChildren().addAll(buildEntries(documentData.getDocumentStructure().getRoot().getChildren(), context));
|
||||
|
||||
document.setDocumentTree(context.documentTree);
|
||||
document.setPages(new HashSet<>(context.pages));
|
||||
document.setNumberOfPages(documentData.getPages().length);
|
||||
document.setNumberOfPages(documentData.getDocumentPages().length);
|
||||
|
||||
document.setTextBlock(document.getTextBlock());
|
||||
return document;
|
||||
@ -188,8 +188,8 @@ public class DocumentGraphMapper {
|
||||
|
||||
this.documentTree = documentTree;
|
||||
this.pages = new LinkedList<>();
|
||||
this.documentTextBlockData = Arrays.stream(documentData.getAtomicTextBlocks()).toList();
|
||||
this.atomicPositionBlockData = Arrays.stream(documentData.getAtomicPositionBlocks()).toList();
|
||||
this.documentTextBlockData = Arrays.stream(documentData.getDocumentTexts()).toList();
|
||||
this.atomicPositionBlockData = Arrays.stream(documentData.getDocumentPositions()).toList();
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,157 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.assertj.core.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.context.annotation.ComponentScan;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.FilterType;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit.jupiter.SpringExtension;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ChangeType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipeline;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.adapter.model.image.ImageServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.adapter.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.SemanticNode;
|
||||
import com.knecon.fforesight.tenantcommons.TenantsClient;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.ToString;
|
||||
|
||||
@ExtendWith(SpringExtension.class)
|
||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
@Import(HeadlinesGoldStandardIntegrationTest.RedactionIntegrationTestConfiguration.class)
|
||||
public class HeadlinesGoldStandardIntegrationTest {
|
||||
|
||||
@MockBean
|
||||
private TenantsClient tenantsClient;
|
||||
|
||||
@MockBean
|
||||
private RabbitTemplate rabbitTemplate;
|
||||
|
||||
@Autowired
|
||||
private ObjectMapper objectMapper;
|
||||
|
||||
@Autowired
|
||||
private StorageService storageService;
|
||||
|
||||
@Autowired
|
||||
private LayoutParsingPipeline layoutParsingPipeline;
|
||||
|
||||
|
||||
@Test
|
||||
public void testHeadlineDetection() {
|
||||
|
||||
List<Metrics> metrics = new ArrayList<>();
|
||||
metrics.add(getMetrics("files/headlineTest/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf",
|
||||
"files/headlineTest/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1)_REDACTION_LOG.json"));
|
||||
metrics.add(getMetrics("files/headlineTest/91 Trinexapac-ethyl_RAR_01_Volume_1_2018-02-23.pdf",
|
||||
"files/headlineTest/91 Trinexapac-ethyl_RAR_01_Volume_1_2018-02-23_REDACTION_LOG.json"));
|
||||
metrics.add(getMetrics("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf", "files/headlineTest/S-Metolachlor_RAR_01_Volume_1_2018-09-06_REDACTION_LOG.json"));
|
||||
|
||||
double precision = metrics.stream().mapToDouble(Metrics::getPrecision).average().orElse(1.0);
|
||||
double recall = metrics.stream().mapToDouble(Metrics::getRecall).average().orElse(1.0);
|
||||
|
||||
System.out.println("Precision is: " + precision + " recall is: " + recall);
|
||||
|
||||
Assertions.assertThat(precision).isGreaterThanOrEqualTo(0.44f);
|
||||
Assertions.assertThat(recall).isGreaterThanOrEqualTo(0.69f);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private Metrics getMetrics(String filePath, String redactionLogUrl) {
|
||||
|
||||
ClassPathResource redactionLogResource = new ClassPathResource(redactionLogUrl);
|
||||
ClassPathResource pdfFileResource = new ClassPathResource(filePath);
|
||||
|
||||
Set<Headline> goldStandardHeadlines = new HashSet<>();
|
||||
var goldStandardLog = objectMapper.readValue(redactionLogResource.getInputStream(), RedactionLog.class);
|
||||
goldStandardLog.getRedactionLogEntry().removeIf(r -> !r.isRedacted() || r.getChanges().get(r.getChanges().size() - 1).getType().equals(ChangeType.REMOVED));
|
||||
goldStandardLog.getRedactionLogEntry().forEach(e -> goldStandardHeadlines.add(new Headline(e.getPositions().get(0).getPage(), e.getValue())));
|
||||
|
||||
Document documentGraph = layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
|
||||
Loader.loadPDF(pdfFileResource.getInputStream()),
|
||||
new ImageServiceResponse(),
|
||||
new TableServiceResponse());
|
||||
|
||||
var foundHeadlines = documentGraph.streamAllSubNodes()
|
||||
.map(SemanticNode::getHeadline)
|
||||
.distinct()
|
||||
.map(headlineNode -> new Headline(headlineNode.getPages().stream().findFirst().get().getNumber(), headlineNode.getTextBlock().getSearchText().stripTrailing()))
|
||||
.toList();
|
||||
|
||||
Set<Headline> correct = new HashSet<>();
|
||||
Set<Headline> missing;
|
||||
Set<Headline> falsePositive = new HashSet<>();
|
||||
for (Headline headline : foundHeadlines) {
|
||||
if (goldStandardHeadlines.contains(headline)) {
|
||||
correct.add(headline);
|
||||
} else {
|
||||
falsePositive.add(headline);
|
||||
}
|
||||
}
|
||||
|
||||
missing = goldStandardHeadlines.stream().filter(h -> !correct.contains(h)).collect(Collectors.toSet());
|
||||
|
||||
float precision = (float) correct.size() / (float) foundHeadlines.size();
|
||||
float recall = (float) correct.size() / ((float) correct.size() + (float) missing.size());
|
||||
|
||||
return new Metrics(precision, recall);
|
||||
}
|
||||
|
||||
|
||||
@Configuration
|
||||
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class/*, StorageAutoConfiguration.class*/})
|
||||
@ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)})
|
||||
public static class RedactionIntegrationTestConfiguration {
|
||||
|
||||
}
|
||||
|
||||
@Data
|
||||
@EqualsAndHashCode
|
||||
@AllArgsConstructor
|
||||
@ToString
|
||||
private class Metrics {
|
||||
|
||||
private float precision;
|
||||
private float recall;
|
||||
|
||||
}
|
||||
|
||||
@Data
|
||||
@EqualsAndHashCode
|
||||
@AllArgsConstructor
|
||||
@ToString
|
||||
private class Headline {
|
||||
|
||||
private int page;
|
||||
private String headline;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -30,9 +30,9 @@ public class DocumentGraphJsonWritingTest extends BuildDocumentGraphTest {
|
||||
DocumentData documentData = DocumentDataMapper.toDocumentData(documentGraph);
|
||||
ObjectMapper mapper = ObjectMapperFactory.create();
|
||||
mapper.writeValue(new FileOutputStream(File.createTempFile(filename + "_structure", ".json")), documentData.getDocumentStructure());
|
||||
mapper.writeValue(new FileOutputStream(File.createTempFile(filename + "_text", ".json")), documentData.getAtomicTextBlocks());
|
||||
mapper.writeValue(new FileOutputStream(File.createTempFile(filename + "_positions", ".json")), documentData.getAtomicPositionBlocks());
|
||||
mapper.writeValue(new FileOutputStream(File.createTempFile(filename + "_pages", ".json")), documentData.getPages());
|
||||
mapper.writeValue(new FileOutputStream(File.createTempFile(filename + "_text", ".json")), documentData.getDocumentTexts());
|
||||
mapper.writeValue(new FileOutputStream(File.createTempFile(filename + "_positions", ".json")), documentData.getDocumentPositions());
|
||||
mapper.writeValue(new FileOutputStream(File.createTempFile(filename + "_pages", ".json")), documentData.getDocumentPages());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,14 +1,22 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server.graph;
|
||||
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositions;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentText;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.Table;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.mapper.redaction.DocumentDataMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.mapper.redaction.DocumentGraphMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.mapper.taas.TaasDocumentDataMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.mapper.redaction.PropertiesMapper;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@ -16,22 +24,64 @@ public class DocumentGraphMappingTest extends BuildDocumentGraphTest {
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
@Disabled
|
||||
public void testGraphMapping() {
|
||||
|
||||
Document document = buildGraph("files/crafted document");
|
||||
LayoutParsingRequest layoutParsingRequest = buildStandardLayoutParsingRequest();
|
||||
String filename = "files/new/crafted document";
|
||||
|
||||
Document document = buildGraph(filename);
|
||||
DocumentData documentData = DocumentDataMapper.toDocumentData(document);
|
||||
var researchDocumentData = TaasDocumentDataMapper.fromDocument(document);
|
||||
|
||||
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, documentData);
|
||||
storageService.storeJSONObject(TenantContext.getTenantId(), filename + "_PAGES" + ".json", documentData.getDocumentPages());
|
||||
storageService.storeJSONObject(TenantContext.getTenantId(), filename + "_TEXT" + ".json", documentData.getDocumentTexts());
|
||||
storageService.storeJSONObject(TenantContext.getTenantId(), filename + "_POSITIONS" + ".json", documentData.getDocumentPositions());
|
||||
storageService.storeJSONObject(TenantContext.getTenantId(), filename + "_STRUCTURE" + ".json", documentData.getDocumentStructure());
|
||||
|
||||
DocumentData documentData2 = layoutParsingStorageService.readDocumentData(layoutParsingRequest);
|
||||
Document newDocumentGraph = DocumentGraphMapper.toDocumentGraph(documentData2);
|
||||
DocumentPage[] pageData = storageService.readJSONObject(TenantContext.getTenantId(), filename + "_PAGES" + ".json", DocumentPage[].class);
|
||||
DocumentText[] atomicTextBlockData = storageService.readJSONObject(TenantContext.getTenantId(), filename + "_TEXT" + ".json", DocumentText[].class);
|
||||
DocumentPositions[] atomicPositionBlockData = storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
filename + "_POSITIONS" + ".json",
|
||||
DocumentPositions[].class);
|
||||
DocumentStructure documentTreeData = storageService.readJSONObject(TenantContext.getTenantId(), filename + "_STRUCTURE" + ".json", DocumentStructure.class);
|
||||
|
||||
assert document.toString().equals(newDocumentGraph.toString());
|
||||
assert document.getDocumentTree().toString().equals(newDocumentGraph.getDocumentTree().toString());
|
||||
DocumentData documentData2 = DocumentData.builder()
|
||||
.documentPages(pageData)
|
||||
.documentStructure(documentTreeData)
|
||||
.documentTexts(atomicTextBlockData)
|
||||
.documentPositions(atomicPositionBlockData)
|
||||
.build();
|
||||
Document newDocument = DocumentGraphMapper.toDocumentGraph(documentData2);
|
||||
|
||||
assertTrue(allTablesHavePositiveNumberOfRowsAndColumns(document));
|
||||
assertTrue(allTablesHavePositiveNumberOfRowsAndColumns(documentData));
|
||||
assertTrue(allTablesHavePositiveNumberOfRowsAndColumns(documentData2));
|
||||
assertTrue(allTablesHavePositiveNumberOfRowsAndColumns(newDocument));
|
||||
|
||||
assertEquals(document.toString(), newDocument.toString());
|
||||
assertEquals(document.getDocumentTree().toString(), newDocument.getDocumentTree().toString());
|
||||
}
|
||||
|
||||
|
||||
private static boolean allTablesHavePositiveNumberOfRowsAndColumns(Document document) {
|
||||
|
||||
return document.streamAllSubNodes()
|
||||
.filter(semanticNode -> semanticNode instanceof Table)
|
||||
.map(semanticNode -> (Table) semanticNode)
|
||||
.allMatch(tableNode -> tableNode.getNumberOfCols() > 0 && tableNode.getNumberOfRows() > 0);
|
||||
}
|
||||
|
||||
|
||||
private static boolean allTablesHavePositiveNumberOfRowsAndColumns(DocumentData documentData) {
|
||||
|
||||
return documentData.getDocumentStructure()
|
||||
.streamAllEntries()
|
||||
.filter(entryData -> entryData.getType().equals(NodeType.TABLE))
|
||||
.map(DocumentStructure.EntryData::getProperties)
|
||||
.map(properties -> {
|
||||
var builder = Table.builder();
|
||||
PropertiesMapper.parseTableProperties(properties, builder);
|
||||
return builder.build();
|
||||
})
|
||||
.allMatch(tableNode -> tableNode.getNumberOfCols() > 0 && tableNode.getNumberOfRows() > 0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,60 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server.model;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.pdfbox.util.Matrix;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.storage.commons.properties.StorageProperties;
|
||||
import com.iqser.red.storage.commons.service.ObjectSerializer;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextDirection;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
public class TextPositionSequenceTest {
|
||||
|
||||
private static final String TEXT_POSITION_SEQUENCE_AS_JSON = "{\n" //
|
||||
+ " \"page\": 1,\n" //
|
||||
+ " \"textPositions\": [],\n" //
|
||||
+ " \"dir\": 180.0,\n" //
|
||||
+ " \"rotation\": 0,\n" //
|
||||
+ " \"pageHeight\": 800,\n" //
|
||||
+ " \"pageWidth\": 600\n" //
|
||||
+ "}";
|
||||
|
||||
private final ObjectSerializer objectSerializer = new ObjectSerializer(new ObjectMapper(), new StorageProperties());
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testDeserializationWithJackson() {
|
||||
|
||||
TextPositionSequence textPositionSequence = objectSerializer.deserializeWithJackson(new ByteArrayInputStream(TEXT_POSITION_SEQUENCE_AS_JSON.getBytes(StandardCharsets.UTF_8)),
|
||||
TextPositionSequence.class);
|
||||
|
||||
assertPropertiesAfterJsonDeserialization(textPositionSequence);
|
||||
}
|
||||
|
||||
|
||||
private void assertPropertiesAfterJsonDeserialization(TextPositionSequence textPositionSequence) {
|
||||
|
||||
assertThat(textPositionSequence.getPage()).isEqualTo(1);
|
||||
assertThat(textPositionSequence.getTextPositions()).hasSize(0);
|
||||
assertThat(textPositionSequence.getDir()).isEqualTo(TextDirection.HALF_CIRCLE);
|
||||
assertThat(textPositionSequence.getRotation()).isEqualTo(0);
|
||||
assertThat(textPositionSequence.getPageHeight()).isEqualTo(800f);
|
||||
assertThat(textPositionSequence.getPageWidth()).isEqualTo(600f);
|
||||
}
|
||||
|
||||
|
||||
private Matrix createIdentityMatrix() {
|
||||
|
||||
return new Matrix();
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,568 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server.segmentation;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.ComponentScan;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.FilterType;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit.jupiter.SpringExtension;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.adapter.CvTableParsingAdapter;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.adapter.ImageServiceResponseAdapter;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.adapter.model.image.ImageServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.adapter.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.ImageType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.PdfParsingService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService;
|
||||
import com.knecon.fforesight.service.layoutparser.server.Application;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.FileSystemBackedStorageService;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@ExtendWith(SpringExtension.class)
|
||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
@Import(PdfSegmentationServiceTest.TestConfiguration.class)
|
||||
public class PdfSegmentationServiceTest {
|
||||
|
||||
@Autowired
|
||||
private PdfParsingService pdfParsingService;
|
||||
|
||||
@Autowired
|
||||
private ObjectMapper objectMapper;
|
||||
|
||||
@MockBean
|
||||
private RabbitTemplate rabbitTemplate;
|
||||
|
||||
@Autowired
|
||||
private RedactManagerClassificationService redactManagerClassificationService;
|
||||
|
||||
@Autowired
|
||||
private CvTableParsingAdapter cvTableParsingAdapter;
|
||||
|
||||
@Autowired
|
||||
private ImageServiceResponseAdapter imageServiceResponseAdapter;
|
||||
|
||||
@Autowired
|
||||
private SectionsBuilderService sectionsBuilderService;
|
||||
|
||||
@Configuration
|
||||
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
|
||||
@ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)})
|
||||
public static class TestConfiguration {
|
||||
|
||||
@Bean
|
||||
@Primary
|
||||
public StorageService inmemoryStorage() {
|
||||
|
||||
return new FileSystemBackedStorageService();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
public ClassificationDocument buildClassificationDocument(PDDocument originDocument) {
|
||||
|
||||
ClassificationDocument classificationDocument = pdfParsingService.parseDocument(LayoutParsingType.REDACT_MANAGER,
|
||||
originDocument,
|
||||
cvTableParsingAdapter.buildCvParsedTablesPerPage(new TableServiceResponse()),
|
||||
imageServiceResponseAdapter.buildClassifiedImagesPerPage(new ImageServiceResponse()));
|
||||
|
||||
redactManagerClassificationService.classifyDocument(classificationDocument);
|
||||
|
||||
sectionsBuilderService.buildSections(classificationDocument);
|
||||
|
||||
return classificationDocument;
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testMapping() {
|
||||
|
||||
ClassPathResource responseJson = new ClassPathResource("files/image_response.json");
|
||||
ImageServiceResponse imageServiceResponse = objectMapper.readValue(responseJson.getInputStream(), ImageServiceResponse.class);
|
||||
|
||||
Map<Integer, List<ClassifiedImage>> images = new HashMap<>();
|
||||
imageServiceResponse.getData()
|
||||
.forEach(imageMetadata -> images.computeIfAbsent(imageMetadata.getPosition().getPageNumber(), x -> new ArrayList<>())
|
||||
.add(new ClassifiedImage(new Rectangle2D.Double(imageMetadata.getPosition().getX1(),
|
||||
imageMetadata.getPosition().getY1(),
|
||||
imageMetadata.getGeometry().getWidth(),
|
||||
imageMetadata.getGeometry().getHeight()),
|
||||
ImageType.valueOf(imageMetadata.getClassification().getLabel().toUpperCase(Locale.ROOT)),
|
||||
imageMetadata.isAlpha(),
|
||||
imageMetadata.getPosition().getPageNumber())));
|
||||
|
||||
System.out.println("object");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testPDFSegmentationWithComplexTable() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
|
||||
TablePageBlock table = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList()).get(0);
|
||||
assertThat(table.getColCount()).isEqualTo(6);
|
||||
assertThat(table.getRowCount()).isEqualTo(13);
|
||||
assertThat(table.getRows().stream().mapToInt(List::size).sum()).isEqualTo(6 * 13);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testTableExtraction() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Table.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
|
||||
TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList()).get(0);
|
||||
assertThat(firstTable.getColCount()).isEqualTo(8);
|
||||
assertThat(firstTable.getRowCount()).isEqualTo(1);
|
||||
TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList()).get(1);
|
||||
assertThat(secondTable.getColCount()).isEqualTo(8);
|
||||
assertThat(secondTable.getRowCount()).isEqualTo(2);
|
||||
List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(0).stream().map(Collections::singletonList).collect(Collectors.toList());
|
||||
assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testMultiPageMetadataPropagation() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Multi Page Table.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
|
||||
TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList()).get(0);
|
||||
assertThat(firstTable.getColCount()).isEqualTo(9);
|
||||
assertThat(firstTable.getRowCount()).isEqualTo(5);
|
||||
TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList()).get(1);
|
||||
assertThat(secondTable.getColCount()).isEqualTo(9);
|
||||
assertThat(secondTable.getRowCount()).isEqualTo(6);
|
||||
List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(firstTable.getRowCount() - 1).stream().map(Cell::getHeaderCells).collect(Collectors.toList());
|
||||
assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testHeaderCellsForRotatedTable() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Rotated Table Headers.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
|
||||
TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList()).get(0);
|
||||
assertThat(firstTable.getColCount()).isEqualTo(8);
|
||||
assertThat(firstTable.getRowCount()).isEqualTo(1);
|
||||
TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList()).get(1);
|
||||
assertThat(secondTable.getColCount()).isEqualTo(8);
|
||||
assertThat(secondTable.getRowCount()).isEqualTo(6);
|
||||
List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(0).stream().map(Collections::singletonList).collect(Collectors.toList());
|
||||
assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDoc56Page170() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/56 Fludioxonil_RAR_12_Volume_3CA_B-7_2018-02-21_Page170.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
|
||||
validateTableSize(document, 4);
|
||||
|
||||
validateTable(document, 0, 1, 1, 0, 0);
|
||||
validateTable(document, 1, 2, 2, 0, 0);
|
||||
validateTable(document, 2, 7, 20, 0, 140);
|
||||
validateTable(document, 3, 8, 31, 0, 170);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testVV931175Page1() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/VV-931175_Page1.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
|
||||
validateTableSize(document, 1);
|
||||
|
||||
validateTable(document, 0, 8, 8, 0, 2);
|
||||
|
||||
List<List<String>> values = Arrays.asList(Arrays.asList("Annex point Reference within DAR/RAR",
|
||||
"Author, date",
|
||||
"Study title",
|
||||
"Analytical method Author, date, No.",
|
||||
"Technique, LOQ of the method, validated working range",
|
||||
"Method meets analytical validation criteria",
|
||||
"Remarks (in case validation criteria are not met)",
|
||||
"Acceptability of the method"),
|
||||
Arrays.asList("",
|
||||
"Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
|
||||
"Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
|
||||
"Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
|
||||
"Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
|
||||
"",
|
||||
"Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies",
|
||||
"Part (a) Methods in soil, water, sediment, air and any additional matrices used in support of environmental fate studies"),
|
||||
Arrays.asList("CA 7.1.2.1.1 DAR (2009)",
|
||||
"Evans P.G. 2001 TMJ4569B, VV-323245",
|
||||
"Azoxystrobin Laboratory Degradation Study in Three Soil Types, Sampled from Holland and the United Kingdom",
|
||||
"Method: RAM 269 Johnson R.I., Tummon O.J., Earl M. 1995 RJ1864B, VV-377731 Johnson R.I., Tummon O.J., Earl M. 1998 RAM 269/02, VV-124072 Johnson R.I., Tummon O.J., Earl M. 2000 RAM 269/03, VV-123986 Validation: Robinson N.J. 2001 TMJ4617B, VV-895845 in a Trial Carried",
|
||||
"LC-MS/MS LOQ: 0.01 mg/kg (R401553 (SYN50165 7), R402173 (SYN501114 )) or 0.02 mg/kg (azoxystrobin, R230310, R234886) Working range: 0.02-1.0 or 0.01-0.5 mg/kg (depending on analyte) Other supporting quantificati on methods: HPLC-UV GC-MSD",
|
||||
"Y",
|
||||
"N/A",
|
||||
"Y"));
|
||||
|
||||
validateTable(document, 0, values);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDoc27Page6() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/27 A8637C - EU AIR3 - MCP Section 1 - Identity of the plant protection product_Page6.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
|
||||
validateTableSize(document, 4);
|
||||
|
||||
validateTable(document, 0, 3, 2, 0, 0);
|
||||
validateTable(document, 1, 3, 2, 0, 0);
|
||||
validateTable(document, 2, 3, 3, 0, 0);
|
||||
validateTable(document, 3, 3, 3, 0, 0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDocA20622APartB9Page185() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izRMS (CZ) fRR Part B9_Page185.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
|
||||
validateTableSize(document, 2);
|
||||
|
||||
validateTable(document, 0, 5, 5, 0, 23);
|
||||
validateTable(document, 1, 11, 9, 0, 36);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDocA20622APartB7Page123() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izZRMS (CZ) fRR Part B7_Page123.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
|
||||
validateTableSize(document, 6);
|
||||
|
||||
validateTable(document, 0, 2, 1, 0, 0);
|
||||
validateTable(document, 1, 2, 1, 0, 0);
|
||||
validateTable(document, 2, 2, 5, 0, 0);
|
||||
validateTable(document, 3, 2, 5, 0, 0);
|
||||
validateTable(document, 4, 2, 4, 0, 0);
|
||||
validateTable(document, 5, 2, 1, 0, 0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDoc77Page111() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/77 Pirimicarb_RAR_08_Volume_3CA_B-6_2017-12-04_Page11.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
|
||||
validateTableSize(document, 3);
|
||||
|
||||
validateTable(document, 0, 7, 9, 0, 0);
|
||||
validateTable(document, 1, 2, 1, 0, 0);
|
||||
validateTable(document, 2, 2, 10, 0, 0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDoc95Page532() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/95 Trinexapac-ethyl_RAR_08_Volume_3CA_B-6_2018-01-10_Page532.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
|
||||
validateTableSize(document, 1);
|
||||
validateTable(document, 0, 9, 9, 0, 0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDoc52Page175() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21_Page175.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
|
||||
validateTableSize(document, 1);
|
||||
|
||||
validateTable(document, 0, 9, 5, 6, 0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDoc52Page174() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21_Page174.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
|
||||
validateTableSize(document, 1);
|
||||
validateTable(document, 0, 9, 6, 7, 0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDoc19Page35() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017_Page35.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
|
||||
validateTableSize(document, 1);
|
||||
validateTable(document, 0, 10, 6, 0, 1);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDoc19Page161() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017_Page161.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
|
||||
validateTableSize(document, 2);
|
||||
validateTable(document, 0, 2, 2, 0, 0);
|
||||
validateTable(document, 1, 1, 1, 0, 0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDoc47Page30() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource(
|
||||
"files/SinglePages/47 Cyprodinil - EU AIR3 - MCA Section 5 Supplement - Toxicological and metabolism studies on the active substance_Page30.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
|
||||
validateTableSize(document, 2);
|
||||
|
||||
validateTable(document, 0, 7, 8, 1, 0);
|
||||
validateTable(document, 1, 7, 8, 1, 0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDoc49Page61() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource(
|
||||
"files/SinglePages/49 Cyprodinil - EU AIR3 - MCA Section 8 Supplement - Ecotoxicological studies on the active substance_Page61.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
|
||||
validateTableSize(document, 2);
|
||||
|
||||
validateTable(document, 0, 4, 17, 0, 0);
|
||||
validateTable(document, 1, 7, 12, 0, 0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDoc81Page54() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/81 Pirimicarb_RAR_20_Volume_3CP_A10788A (_Pirimor_)_B-9_2017-12-04_Page54.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
|
||||
validateTableSize(document, 2);
|
||||
|
||||
validateTable(document, 0, 5, 14, 4, 0);
|
||||
validateTable(document, 1, 7, 12, 0, 0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDoc88Page134() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/85 Pydiflumetofen_DAR_08_Volume_3CA_B-6_2017-07-26_Page134.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
|
||||
validateTableSize(document, 2);
|
||||
|
||||
validateTable(document, 0, 5, 17, 3, 0);
|
||||
validateTable(document, 1, 5, 16, 2, 0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDocThiabendazolePage18() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/Thiabendazole DAR Addendum for ED_April_2020_Page18.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
|
||||
validateTableSize(document, 4);
|
||||
|
||||
validateTable(document, 0, 4, 4, 0, 0);
|
||||
validateTable(document, 1, 1, 1, 0, 0);
|
||||
validateTable(document, 2, 2, 3, 0, 0);
|
||||
validateTable(document, 3, 1, 1, 0, 0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDoc15Page18() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/15 - Pretilachlor - Acute Oral Toxicity (Up and Down Procedure) - Rat_Page18.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
|
||||
validateTableSize(document, 1);
|
||||
|
||||
validateTable(document, 0, 11, 8, 0, 0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDoc28Page23() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource(
|
||||
"files/SinglePages/28 A8637C - EU AIR3 - MCP Section 10 - Ecotoxicological studies on the plant protection product_Page23.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
|
||||
validateTableSize(document, 2);
|
||||
|
||||
validateTable(document, 0, 6, 8, 0, 2);
|
||||
validateTable(document, 1, 6, 8, 0, 1);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDoc24Page17() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/24 - SYN549522 - Acute Oral Toxicity - Rats_Page17.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
|
||||
validateTableSize(document, 1);
|
||||
|
||||
validateTable(document, 0, 9, 5, 2, 0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDoc30Page5() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/30 - Dicamba - Acute Oral Toxicity - Rats_Page5.pdf");
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getInputStream()));
|
||||
|
||||
validateTableSize(document, 1);
|
||||
|
||||
validateTable(document, 0, 3, 5, 0, 0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
private void validateTable(ClassificationDocument document, int tableIndex, int colCount, int rowCount, int emptyCellsCountCorrect, int emptyCellsCountIncorrect) {
|
||||
|
||||
TablePageBlock table = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(tableIndex);
|
||||
List<List<Cell>> rows = table.getRows();
|
||||
int emptyCellsFoundFound = rows.stream().flatMap(List::stream).toList().stream().filter(f -> f.toString().equals("")).toList().size();
|
||||
|
||||
assertThat(emptyCellsFoundFound).isEqualTo(emptyCellsCountCorrect + emptyCellsCountIncorrect);
|
||||
|
||||
assertThat(table.getColCount()).isEqualTo(colCount);
|
||||
assertThat(table.getRowCount()).isEqualTo(rowCount);
|
||||
|
||||
}
|
||||
|
||||
|
||||
private void validateTable(ClassificationDocument document, int tableIndex, List<List<String>> values) {
|
||||
|
||||
TablePageBlock table = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(tableIndex);
|
||||
List<List<Cell>> rows = table.getRows();
|
||||
|
||||
List<Cell> rowsFlattened = rows.stream().flatMap(List::stream).toList();
|
||||
List<String> valuesFlattened = values.stream().flatMap(List::stream).toList();
|
||||
|
||||
for (int i = 0; i < valuesFlattened.size(); i++) {
|
||||
Cell cell = rowsFlattened.get(i);
|
||||
String value = valuesFlattened.get(i);
|
||||
assertThat(cell.toString()).isEqualTo(value);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
private void validateTableSize(ClassificationDocument document, int tableSize) {
|
||||
|
||||
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().size()).isEqualTo(tableSize);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -12,17 +12,15 @@ import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.ComponentScan;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit.jupiter.SpringExtension;
|
||||
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingStorageService;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingStorageService;
|
||||
import com.knecon.fforesight.service.layoutparser.server.Application;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
import com.knecon.fforesight.tenantcommons.TenantsClient;
|
||||
@ -151,13 +149,6 @@ public class BaseTest {
|
||||
@ComponentScan("com.knecon.fforesight.service.layoutparser")
|
||||
public static class TestConfiguration {
|
||||
|
||||
@Bean
|
||||
@Primary
|
||||
public StorageService inMemoryStorage() {
|
||||
|
||||
return new FileSystemBackedStorageService();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,686 @@
|
||||
{
|
||||
"dossierId": "f889853e-4bf8-49a9-aae5-c38605c6ef40",
|
||||
"fileId": "22ef63e29bb2a27db8497272336f6b32",
|
||||
"data": [
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"logo": 1.0,
|
||||
"signature": 0.0,
|
||||
"other": 0.0,
|
||||
"formula": 0.0
|
||||
},
|
||||
"label": "logo"
|
||||
},
|
||||
"position": {
|
||||
"x1": 89.88,
|
||||
"x2": 274.20000000000005,
|
||||
"y1": 716.24,
|
||||
"y2": 770.0,
|
||||
"pageNumber": 1
|
||||
},
|
||||
"geometry": {
|
||||
"width": 184.32000000000005,
|
||||
"height": 53.75999999999999
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.14298074612038092,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 3.42857142857143,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"signature": 0.999968,
|
||||
"logo": 1.6e-05,
|
||||
"other": 1.6e-05,
|
||||
"formula": 0.0
|
||||
},
|
||||
"label": "signature"
|
||||
},
|
||||
"position": {
|
||||
"x1": -0.10000600000000001,
|
||||
"x2": 595.099994,
|
||||
"y1": -0.07998660000000002,
|
||||
"y2": 842.0800134,
|
||||
"pageNumber": 3
|
||||
},
|
||||
"geometry": {
|
||||
"width": 595.2,
|
||||
"height": 842.16
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 1.0000782051152328,
|
||||
"tooLarge": true,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.706754060986036,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"signature": 0.999872,
|
||||
"other": 7.9e-05,
|
||||
"logo": 4.8e-05,
|
||||
"formula": 0.0
|
||||
},
|
||||
"label": "signature"
|
||||
},
|
||||
"position": {
|
||||
"x1": -0.10000600000000001,
|
||||
"x2": 595.099994,
|
||||
"y1": -0.07998660000000002,
|
||||
"y2": 842.0800134,
|
||||
"pageNumber": 7
|
||||
},
|
||||
"geometry": {
|
||||
"width": 595.2,
|
||||
"height": 842.16
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 1.0000782051152328,
|
||||
"tooLarge": true,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.706754060986036,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"signature": 0.996366,
|
||||
"other": 0.00,
|
||||
"logo": 2.3e-05,
|
||||
"formula": 4e-06
|
||||
},
|
||||
"label": "signature"
|
||||
},
|
||||
"position": {
|
||||
"x1": -0.10000600000000001,
|
||||
"x2": 595.099994,
|
||||
"y1": -0.07998660000000002,
|
||||
"y2": 842.0800134,
|
||||
"pageNumber": 8
|
||||
},
|
||||
"geometry": {
|
||||
"width": 595.2,
|
||||
"height": 842.16
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 1.0002630764355351,
|
||||
"tooLarge": true,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.706754060986036,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"signature": 0.999772,
|
||||
"logo": 0.000131,
|
||||
"other": 9.7e-05,
|
||||
"formula": 0.0
|
||||
},
|
||||
"label": "signature"
|
||||
},
|
||||
"position": {
|
||||
"x1": 82.59443842482001,
|
||||
"x2": 512.6365568843402,
|
||||
"y1": 116.943736387567,
|
||||
"y2": 725.0718450317352,
|
||||
"pageNumber": 73
|
||||
},
|
||||
"geometry": {
|
||||
"width": 430.04211845952017,
|
||||
"height": 608.1281086441682
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.72236755521117,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.7071571143427432,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"signature": 1.0,
|
||||
"logo": 0.0,
|
||||
"other": 0.0,
|
||||
"formula": 0.0
|
||||
},
|
||||
"label": "signature"
|
||||
},
|
||||
"position": {
|
||||
"x1": 328.20483600000006,
|
||||
"x2": 393.94460940000005,
|
||||
"y1": 175.1643178,
|
||||
"y2": 203.92865619999998,
|
||||
"pageNumber": 81
|
||||
},
|
||||
"geometry": {
|
||||
"width": 65.73977339999999,
|
||||
"height": 28.764338399999986
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.06142518774572455,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 2.2854609929078022,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"signature": 1.0,
|
||||
"logo": 0.0,
|
||||
"other": 0.0,
|
||||
"formula": 0.0
|
||||
},
|
||||
"label": "signature"
|
||||
},
|
||||
"position": {
|
||||
"x1": 136.5955818,
|
||||
"x2": 224.72461859999999,
|
||||
"y1": 175.1133172,
|
||||
"y2": 203.97965680000001,
|
||||
"pageNumber": 81
|
||||
},
|
||||
"geometry": {
|
||||
"width": 88.1290368,
|
||||
"height": 28.866339600000003
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.07124601312700823,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 3.053003533568904,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"logo": 1.0,
|
||||
"signature": 0.0,
|
||||
"other": 0.0,
|
||||
"formula": 0.0
|
||||
},
|
||||
"label": "logo"
|
||||
},
|
||||
"position": {
|
||||
"x1": 194.99126880000003,
|
||||
"x2": 399.80967840000005,
|
||||
"y1": 554.6597824,
|
||||
"y2": 686.2413304,
|
||||
"pageNumber": 81
|
||||
},
|
||||
"geometry": {
|
||||
"width": 204.81840960000002,
|
||||
"height": 131.581548
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.23189275858788796,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 1.5565891472868219,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"other": 1.0,
|
||||
"signature": 0.0,
|
||||
"formula": 0.0,
|
||||
"logo": 0.0
|
||||
},
|
||||
"label": "other"
|
||||
},
|
||||
"position": {
|
||||
"x1": 133.9945512,
|
||||
"x2": 242.52382799999998,
|
||||
"y1": 411.24609519999996,
|
||||
"y2": 523.2434128,
|
||||
"pageNumber": 90
|
||||
},
|
||||
"geometry": {
|
||||
"width": 108.52927679999999,
|
||||
"height": 111.99731760000003
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.15573364968831904,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.9690346083788703,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"other": 1.0,
|
||||
"signature": 0.0,
|
||||
"formula": 0.0,
|
||||
"logo": 0.0
|
||||
},
|
||||
"label": "other"
|
||||
},
|
||||
"position": {
|
||||
"x1": 133.5865464,
|
||||
"x2": 242.3198256,
|
||||
"y1": 274.972492,
|
||||
"y2": 387.7858192,
|
||||
"pageNumber": 90
|
||||
},
|
||||
"geometry": {
|
||||
"width": 108.7332792,
|
||||
"height": 112.8133272
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.15644678522591335,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.9638336347197106,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"other": 1.0,
|
||||
"signature": 0.0,
|
||||
"formula": 0.0,
|
||||
"logo": 0.0
|
||||
},
|
||||
"label": "other"
|
||||
},
|
||||
"position": {
|
||||
"x1": 246.19587120000003,
|
||||
"x2": 356.5611696,
|
||||
"y1": 400.84197279999995,
|
||||
"y2": 519.3673672,
|
||||
"pageNumber": 90
|
||||
},
|
||||
"geometry": {
|
||||
"width": 110.3652984,
|
||||
"height": 118.52539440000004
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.1615575178049721,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.9311531841652321,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"other": 1.0,
|
||||
"signature": 0.0,
|
||||
"formula": 0.0,
|
||||
"logo": 0.0
|
||||
},
|
||||
"label": "other"
|
||||
},
|
||||
"position": {
|
||||
"x1": 244.9718568,
|
||||
"x2": 358.3971912,
|
||||
"y1": 274.972492,
|
||||
"y2": 387.7858192,
|
||||
"pageNumber": 90
|
||||
},
|
||||
"geometry": {
|
||||
"width": 113.4253344,
|
||||
"height": 112.8133272
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.15978662903260646,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 1.0054249547920433,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"other": 1.0,
|
||||
"signature": 0.0,
|
||||
"formula": 0.0,
|
||||
"logo": 0.0
|
||||
},
|
||||
"label": "other"
|
||||
},
|
||||
"position": {
|
||||
"x1": 254.9679744,
|
||||
"x2": 371.6573472,
|
||||
"y1": 439.6024288,
|
||||
"y2": 564.0438928,
|
||||
"pageNumber": 91
|
||||
},
|
||||
"geometry": {
|
||||
"width": 116.6893728,
|
||||
"height": 124.441464
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.17021718544102565,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.9377049180327869,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"other": 1.0,
|
||||
"signature": 0.0,
|
||||
"formula": 0.0,
|
||||
"logo": 0.0
|
||||
},
|
||||
"label": "other"
|
||||
},
|
||||
"position": {
|
||||
"x1": 133.9945512,
|
||||
"x2": 249.663912,
|
||||
"y1": 443.07046959999997,
|
||||
"y2": 687.2613424,
|
||||
"pageNumber": 91
|
||||
},
|
||||
"geometry": {
|
||||
"width": 115.66936080000002,
|
||||
"height": 244.19087280000002
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.23739910530627284,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.4736842105263158,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"other": 1.0,
|
||||
"signature": 0.0,
|
||||
"formula": 0.0,
|
||||
"logo": 0.0
|
||||
},
|
||||
"label": "other"
|
||||
},
|
||||
"position": {
|
||||
"x1": 105.84222,
|
||||
"x2": 374.870385,
|
||||
"y1": 526.40545,
|
||||
"y2": 687.05734,
|
||||
"pageNumber": 92
|
||||
},
|
||||
"geometry": {
|
||||
"width": 269.028165,
|
||||
"height": 160.65188999999998
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.2936614851112628,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 1.6746031746031749,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"logo": 0.788068,
|
||||
"other": 0.152259,
|
||||
"formula": 0.036883,
|
||||
"signature": 0.02279
|
||||
},
|
||||
"label": "logo"
|
||||
},
|
||||
"position": {
|
||||
"x1": 44.64999049990001,
|
||||
"x2": 550.5759424999001,
|
||||
"y1": 63.286004150029996,
|
||||
"y2": 778.72242095003,
|
||||
"pageNumber": 94
|
||||
},
|
||||
"geometry": {
|
||||
"width": 505.92595200000005,
|
||||
"height": 715.4364168000001
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.8498341845521462,
|
||||
"tooLarge": true,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.7071571143427431,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"classification": {
|
||||
"probabilities": {
|
||||
"signature": 0.998335,
|
||||
"logo": 0.000955,
|
||||
"other": 0.000703,
|
||||
"formula": 7e-06
|
||||
},
|
||||
"label": "signature"
|
||||
},
|
||||
"position": {
|
||||
"x1": 58.954005540029996,
|
||||
"x2": 536.45979618003,
|
||||
"y1": 83.94401504006001,
|
||||
"y2": 758.05854296006,
|
||||
"pageNumber": 95
|
||||
},
|
||||
"geometry": {
|
||||
"width": 477.50579064000004,
|
||||
"height": 674.11452792
|
||||
},
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.8014221863697041,
|
||||
"tooLarge": true,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 0.7083452007974936,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": false
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user