Merge branch 'clari-30' into 'main'

CLARI-30 - identifier fix for clarifynd

See merge request fforesight/layout-parser!111
This commit is contained in:
Timo Bejan 2024-03-08 15:28:33 +01:00
commit d80231e4a9
8 changed files with 16 additions and 10 deletions

View File

@ -118,7 +118,7 @@ public class LayoutParsingPipeline {
imageServiceResponse, imageServiceResponse,
tableServiceResponse, tableServiceResponse,
visualLayoutParsingResponse, visualLayoutParsingResponse,
layoutParsingRequest.identifier().toString()); layoutParsingRequest.identifier());
log.info("Building document graph for {}", layoutParsingRequest.identifier()); log.info("Building document graph for {}", layoutParsingRequest.identifier());
@ -207,7 +207,7 @@ public class LayoutParsingPipeline {
ImageServiceResponse imageServiceResponse, ImageServiceResponse imageServiceResponse,
TableServiceResponse tableServiceResponse, TableServiceResponse tableServiceResponse,
VisualLayoutParsingResponse visualLayoutParsingResponse, VisualLayoutParsingResponse visualLayoutParsingResponse,
String identifier) { Map<String, String> identifier) {
PDDocument originDocument = openDocument(originFile); PDDocument originDocument = openDocument(originFile);
addNumberOfPagesToTrace(originDocument.getNumberOfPages(), Files.size(originFile.toPath())); addNumberOfPagesToTrace(originDocument.getNumberOfPages(), Files.size(originFile.toPath()));

View File

@ -11,6 +11,7 @@ import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.time.Duration; import java.time.Duration;
import java.util.List; import java.util.List;
import java.util.Map;
import org.apache.pdfbox.Loader; import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocument;
@ -54,7 +55,7 @@ public class BdrJsonBuildTest extends AbstractTest {
new ImageServiceResponse(), new ImageServiceResponse(),
new TableServiceResponse(), new TableServiceResponse(),
new VisualLayoutParsingResponse(), new VisualLayoutParsingResponse(),
file.toString())); Map.of("file",file.toString())));
} }

View File

@ -3,6 +3,7 @@ package com.knecon.fforesight.service.layoutparser.server;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@ -101,7 +102,7 @@ public class HeadlinesGoldStandardIntegrationTest {
new ImageServiceResponse(), new ImageServiceResponse(),
new TableServiceResponse(), new TableServiceResponse(),
new VisualLayoutParsingResponse(), new VisualLayoutParsingResponse(),
filePath)); Map.of("file",filePath)));
var foundHeadlines = documentGraph.streamAllSubNodes() var foundHeadlines = documentGraph.streamAllSubNodes()
.map(SemanticNode::getHeadline) .map(SemanticNode::getHeadline)

View File

@ -4,6 +4,7 @@ import java.io.File;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.Map;
import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@ -61,7 +62,7 @@ public class DocumentGraphJsonWritingTest extends BuildDocumentTest {
new ImageServiceResponse(), new ImageServiceResponse(),
new TableServiceResponse(), new TableServiceResponse(),
new VisualLayoutParsingResponse(), new VisualLayoutParsingResponse(),
filename.toFile().toString())); Map.of("file",filename.toFile().toString())));
DocumentData documentData = DocumentDataMapper.toDocumentData(documentGraph); DocumentData documentData = DocumentDataMapper.toDocumentData(documentGraph);
ObjectMapper mapper = ObjectMapperFactory.create(); ObjectMapper mapper = ObjectMapperFactory.create();

View File

@ -2,6 +2,7 @@ package com.knecon.fforesight.service.layoutparser.server.graph;
import java.io.File; import java.io.File;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.Map;
import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@ -58,7 +59,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
new ImageServiceResponse(), new ImageServiceResponse(),
tableResponse, tableResponse,
new VisualLayoutParsingResponse(), new VisualLayoutParsingResponse(),
Path.of(fileName).getFileName().toFile().toString()); Map.of("file",Path.of(fileName).getFileName().toFile().toString()));
ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null); ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null);
LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService); LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
Document document = DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.DOCUMINE, classificationDocument); Document document = DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.DOCUMINE, classificationDocument);

View File

@ -61,7 +61,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
new ImageServiceResponse(), new ImageServiceResponse(),
tableServiceResponse, tableServiceResponse,
new VisualLayoutParsingResponse(), new VisualLayoutParsingResponse(),
"document"); Map.of("file","document"));
redactManagerClassificationService.classifyDocument(classificationDocument); redactManagerClassificationService.classifyDocument(classificationDocument);

View File

@ -7,6 +7,7 @@ import java.nio.file.Path;
import java.util.Collections; import java.util.Collections;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.pdfbox.Loader; import org.apache.pdfbox.Loader;
@ -105,14 +106,14 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
new ImageServiceResponse(), new ImageServiceResponse(),
new TableServiceResponse(), new TableServiceResponse(),
new VisualLayoutParsingResponse(), new VisualLayoutParsingResponse(),
filename.toFile().toString())); Map.of("file",filename.toFile().toString())));
Document documentGraphAfter = DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.REDACT_MANAGER_OLD, Document documentGraphAfter = DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.REDACT_MANAGER_OLD,
layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER_OLD, layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER_OLD,
filename.toFile(), filename.toFile(),
new ImageServiceResponse(), new ImageServiceResponse(),
new TableServiceResponse(), new TableServiceResponse(),
new VisualLayoutParsingResponse(), new VisualLayoutParsingResponse(),
filename.toFile().toString())); Map.of("file",filename.toFile().toString())));
DocumentData documentDataBefore = DocumentDataMapper.toDocumentData(documentGraphBefore); DocumentData documentDataBefore = DocumentDataMapper.toDocumentData(documentGraphBefore);
DocumentData documentDataAfter = DocumentDataMapper.toDocumentData(documentGraphAfter); DocumentData documentDataAfter = DocumentDataMapper.toDocumentData(documentGraphAfter);
if (!compareStructures(documentDataBefore.getDocumentStructure(), documentDataAfter.getDocumentStructure())) { if (!compareStructures(documentDataBefore.getDocumentStructure(), documentDataAfter.getDocumentStructure())) {

View File

@ -1,6 +1,7 @@
package com.knecon.fforesight.service.layoutparser.server.utils; package com.knecon.fforesight.service.layoutparser.server.utils;
import java.io.File; import java.io.File;
import java.util.Map;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.ClassPathResource; import org.springframework.core.io.ClassPathResource;
@ -31,7 +32,7 @@ public abstract class BuildDocumentTest extends AbstractTest {
layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID),
new TableServiceResponse(), new TableServiceResponse(),
new VisualLayoutParsingResponse(), new VisualLayoutParsingResponse(),
filename); Map.of("file",filename));
} }