diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/graph/textblock/AtomicTextBlock.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/graph/textblock/AtomicTextBlock.java index 9db1955..c1bde81 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/graph/textblock/AtomicTextBlock.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/graph/textblock/AtomicTextBlock.java @@ -208,7 +208,7 @@ public class AtomicTextBlock implements TextBlock { List rectanglesPerLine = stringBoundary.split(getAllLineBreaksInBoundary(stringBoundary)) .stream() .map(this::getPositions) - .map(RectangleTransformations::rectangleUnionWithGaps) + .map(RectangleTransformations::rectangleBBoxWithGaps) .flatMap(Collection::stream) .toList(); Map> rectanglePerLinePerPage = new HashMap<>(); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/RectangleTransformations.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/RectangleTransformations.java index d5617a3..8cd8931 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/RectangleTransformations.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/RectangleTransformations.java @@ -1,6 +1,5 @@ package com.knecon.fforesight.service.layoutparser.processor.utils; -import java.awt.geom.Area; import java.awt.geom.Rectangle2D; import java.awt.geom.RectangularShape; import java.util.Collections; @@ -19,11 +18,14 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlo import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle; import com.knecon.fforesight.service.layoutparser.processor.graph.textblock.AtomicTextBlock; +import lombok.AllArgsConstructor; +import lombok.NoArgsConstructor; + public class RectangleTransformations { - public static PDRectangle toPDRectangleUnion(List rectangles) { + public static PDRectangle toPDRectangleBBox(List rectangles) { - Rectangle2D rectangle2D = RectangleTransformations.bBoxUnionRectangle(rectangles); + Rectangle2D rectangle2D = RectangleTransformations.rectangleBBox(rectangles); PDRectangle annotationPosition = new PDRectangle(); annotationPosition.setLowerLeftX((float) rectangle2D.getMinX()); @@ -34,15 +36,15 @@ public class RectangleTransformations { } - public static Rectangle2D bBoxUnionAtomicTextBlock(List atomicTextBlocks) { + public static Rectangle2D atomicTextBlockBBox(List atomicTextBlocks) { - return atomicTextBlocks.stream().flatMap(atomicTextBlock -> atomicTextBlock.getPositions().stream()).collect(new Rectangle2DUnion()); + return atomicTextBlocks.stream().flatMap(atomicTextBlock -> atomicTextBlock.getPositions().stream()).collect(new Rectangle2DBBoxCollector()); } - public static Rectangle2D bBoxUnionRectangle(List rectangles) { + public static Rectangle2D rectangleBBox(List rectangles) { - return rectangles.stream().map(RectangleTransformations::toRectangle2D).collect(new Rectangle2DUnion()); + return rectangles.stream().map(RectangleTransformations::toRectangle2D).collect(new Rectangle2DBBoxCollector()); } @@ -64,9 +66,9 @@ public class RectangleTransformations { } - public static Rectangle2D rectangleUnion(List rectangle2DList) { + public static Rectangle2D rectangle2DBBox(List rectangle2DList) { - return rectangle2DList.stream().collect(new Rectangle2DUnion()); + return rectangle2DList.stream().collect(new Rectangle2DBBoxCollector()); } @@ -76,7 +78,7 @@ public class RectangleTransformations { * @param rectangle2DList A list of rectangles to combine * @return A list of rectangles which are combined if they are closer than the split threshold */ - public static List rectangleUnionWithGaps(List rectangle2DList) { + public static List rectangleBBoxWithGaps(List rectangle2DList) { if (rectangle2DList.isEmpty()) { return Collections.emptyList(); @@ -98,49 +100,87 @@ public class RectangleTransformations { previousRectangle = currentRectangle; } } - return rectangleListsWithGaps.stream().map(RectangleTransformations::rectangleUnion).toList(); + return rectangleListsWithGaps.stream().map(RectangleTransformations::rectangle2DBBox).toList(); } - private static class Rectangle2DUnion implements Collector { + private static class Rectangle2DBBoxCollector implements Collector { @Override - public Supplier supplier() { + public Supplier supplier() { - return Area::new; + return BBox::new; } @Override - public BiConsumer accumulator() { + public BiConsumer accumulator() { - return (area, rectangle2D) -> area.add(new Area(rectangle2D)); + return (bb, rect) -> bb.addRectangle(rect.getMinX(), rect.getMinY(), rect.getMaxX(), rect.getMaxY()); } @Override - public BinaryOperator combiner() { + public BinaryOperator combiner() { - return (area1, area2) -> { - area1.add(area2); - return area1; - }; + return (b1, b2) -> new BBox(Math.min(b1.lowerLeftX, b2.lowerLeftX), + Math.min(b1.lowerLeftY, b2.lowerLeftY), + Math.max(b1.upperRightX, b2.upperRightX), + Math.max(b1.upperRightY, b2.upperRightY)); } @Override - public Function finisher() { + public Function finisher() { - return Area::getBounds2D; + return bb -> new Rectangle2D.Double(bb.lowerLeftX, bb.lowerLeftY, bb.upperRightX - bb.lowerLeftX, bb.upperRightY - bb.lowerLeftY); } @Override public Set characteristics() { - return Set.of(Characteristics.CONCURRENT, Characteristics.UNORDERED); + return Set.of(Characteristics.UNORDERED); + } + + + @AllArgsConstructor + @NoArgsConstructor + private static class BBox { + + Double lowerLeftX; + Double lowerLeftY; + Double upperRightX; + Double upperRightY; + + + public void addRectangle(double lowerLeftX, double lowerLeftY, double upperRightX, double upperRightY) { + + if (this.lowerLeftX == null) { + this.lowerLeftX = lowerLeftX; + } else if (this.lowerLeftX > lowerLeftX) { + this.lowerLeftX = lowerLeftX; + } + if (this.lowerLeftY == null) { + this.lowerLeftY = lowerLeftY; + } else if (this.lowerLeftY > lowerLeftY) { + this.lowerLeftY = lowerLeftY; + } + if (this.upperRightX == null) { + this.upperRightX = upperRightX; + } else if (this.upperRightX < upperRightX) { + this.upperRightX = upperRightX; + } + if (this.upperRightY == null) { + this.upperRightY = upperRightY; + } else if (this.upperRightY < upperRightY) { + this.upperRightY = upperRightY; + } + + } + } } -} +} \ No newline at end of file diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/BuildDocumentGraphTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/BuildDocumentGraphTest.java index d74d2e0..ae9ecfc 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/BuildDocumentGraphTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/BuildDocumentGraphTest.java @@ -35,12 +35,16 @@ public class BuildDocumentGraphTest extends BaseTest { @SneakyThrows protected Document buildGraph(String filename) { - if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06")) { - prepareStorage(filename + ".pdf", "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json"); - } else { - prepareStorage(filename + ".pdf"); + if (!filename.endsWith(".pdf")) { + filename = filename + ".pdf"; } - ClassPathResource fileResource = new ClassPathResource(filename + ".pdf"); + + if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06")) { + prepareStorage(filename, "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json"); + } else { + prepareStorage(filename); + } + ClassPathResource fileResource = new ClassPathResource(filename); try (InputStream inputStream = fileResource.getInputStream()) { PDDocument pdDocument = Loader.loadPDF(inputStream); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentDataTests.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentDataTests.java index 69f98ff..179dcfc 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentDataTests.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentDataTests.java @@ -1,5 +1,45 @@ package com.knecon.fforesight.service.layoutparser.server.graph; -public class DocumentDataTests { +import java.io.File; +import java.io.FileOutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import org.junit.jupiter.api.Test; +import org.springframework.core.io.ClassPathResource; + +import com.iqser.red.commons.jackson.ObjectMapperFactory; +import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData; +import com.knecon.fforesight.service.layoutparser.processor.mapper.redaction.DocumentDataMapper; + +import lombok.SneakyThrows; + +public class DocumentDataTests extends BuildDocumentGraphTest{ + @Test + @SneakyThrows + public void createDocumentDataForAllFiles() { + + String outPath = "/tmp/document_data_output_layoutparser"; + + ClassPathResource resource = new ClassPathResource("files"); + List pdfFileNames = Files.walk(resource.getFile().toPath()) + .filter(path -> path.getFileName().toString().endsWith(".pdf")) + .map(Path::toAbsolutePath) + .map(Path::toString) + .toList(); + System.out.printf("%d Files found%n", pdfFileNames.size()); + for (int i = 0; i < pdfFileNames.size(); i++) { + System.out.printf("%d/%d: %s%n", i, pdfFileNames.size(), pdfFileNames.get(i)); + } + for (String pdfFileName : pdfFileNames) { + System.out.println(pdfFileName); + DocumentData documentData = DocumentDataMapper.toDocumentData(buildGraph(resource.getFile().toPath().getParent().relativize(Path.of(pdfFileName)).toString())); + File outputFile = Path.of(outPath).resolve(resource.getFile().toPath().relativize(Path.of(pdfFileName))).toFile(); + outputFile.toPath().getParent().toFile().mkdirs(); + try (var out = new FileOutputStream(outputFile.toString().replace(".pdf", ".json"))) { + ObjectMapperFactory.create().writeValue(out, documentData); + } + } + } } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java index 194aa53..85ef429 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java @@ -20,7 +20,7 @@ public class DocumentGraphJsonWritingTest extends BuildDocumentGraphTest { @Disabled public void writeJsonForFileTest() { - writeJsons("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06"); + writeJsons("files/216"); } @SneakyThrows