RED-7081: getBBox() Performance Improvement

This commit is contained in:
Kilian Schuettler 2023-07-04 20:19:53 +02:00 committed by Timo Bejan
parent 788613c92e
commit 15a6d46f5c
5 changed files with 117 additions and 33 deletions

View File

@ -208,7 +208,7 @@ public class AtomicTextBlock implements TextBlock {
List<Rectangle2D> rectanglesPerLine = stringBoundary.split(getAllLineBreaksInBoundary(stringBoundary))
.stream()
.map(this::getPositions)
.map(RectangleTransformations::rectangleUnionWithGaps)
.map(RectangleTransformations::rectangleBBoxWithGaps)
.flatMap(Collection::stream)
.toList();
Map<Page, List<Rectangle2D>> rectanglePerLinePerPage = new HashMap<>();

View File

@ -1,6 +1,5 @@
package com.knecon.fforesight.service.layoutparser.processor.utils;
import java.awt.geom.Area;
import java.awt.geom.Rectangle2D;
import java.awt.geom.RectangularShape;
import java.util.Collections;
@ -19,11 +18,14 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlo
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
import com.knecon.fforesight.service.layoutparser.processor.graph.textblock.AtomicTextBlock;
import lombok.AllArgsConstructor;
import lombok.NoArgsConstructor;
public class RectangleTransformations {
public static PDRectangle toPDRectangleUnion(List<Rectangle> rectangles) {
public static PDRectangle toPDRectangleBBox(List<Rectangle> rectangles) {
Rectangle2D rectangle2D = RectangleTransformations.bBoxUnionRectangle(rectangles);
Rectangle2D rectangle2D = RectangleTransformations.rectangleBBox(rectangles);
PDRectangle annotationPosition = new PDRectangle();
annotationPosition.setLowerLeftX((float) rectangle2D.getMinX());
@ -34,15 +36,15 @@ public class RectangleTransformations {
}
public static Rectangle2D bBoxUnionAtomicTextBlock(List<AtomicTextBlock> atomicTextBlocks) {
public static Rectangle2D atomicTextBlockBBox(List<AtomicTextBlock> atomicTextBlocks) {
return atomicTextBlocks.stream().flatMap(atomicTextBlock -> atomicTextBlock.getPositions().stream()).collect(new Rectangle2DUnion());
return atomicTextBlocks.stream().flatMap(atomicTextBlock -> atomicTextBlock.getPositions().stream()).collect(new Rectangle2DBBoxCollector());
}
public static Rectangle2D bBoxUnionRectangle(List<Rectangle> rectangles) {
public static Rectangle2D rectangleBBox(List<Rectangle> rectangles) {
return rectangles.stream().map(RectangleTransformations::toRectangle2D).collect(new Rectangle2DUnion());
return rectangles.stream().map(RectangleTransformations::toRectangle2D).collect(new Rectangle2DBBoxCollector());
}
@ -64,9 +66,9 @@ public class RectangleTransformations {
}
public static Rectangle2D rectangleUnion(List<Rectangle2D> rectangle2DList) {
public static Rectangle2D rectangle2DBBox(List<Rectangle2D> rectangle2DList) {
return rectangle2DList.stream().collect(new Rectangle2DUnion());
return rectangle2DList.stream().collect(new Rectangle2DBBoxCollector());
}
@ -76,7 +78,7 @@ public class RectangleTransformations {
* @param rectangle2DList A list of rectangles to combine
* @return A list of rectangles which are combined if they are closer than the split threshold
*/
public static List<Rectangle2D> rectangleUnionWithGaps(List<Rectangle2D> rectangle2DList) {
public static List<Rectangle2D> rectangleBBoxWithGaps(List<Rectangle2D> rectangle2DList) {
if (rectangle2DList.isEmpty()) {
return Collections.emptyList();
@ -98,49 +100,87 @@ public class RectangleTransformations {
previousRectangle = currentRectangle;
}
}
return rectangleListsWithGaps.stream().map(RectangleTransformations::rectangleUnion).toList();
return rectangleListsWithGaps.stream().map(RectangleTransformations::rectangle2DBBox).toList();
}
private static class Rectangle2DUnion implements Collector<Rectangle2D, Area, Rectangle2D> {
private static class Rectangle2DBBoxCollector implements Collector<Rectangle2D, Rectangle2DBBoxCollector.BBox, Rectangle2D> {
@Override
public Supplier<Area> supplier() {
public Supplier<BBox> supplier() {
return Area::new;
return BBox::new;
}
@Override
public BiConsumer<Area, Rectangle2D> accumulator() {
public BiConsumer<BBox, Rectangle2D> accumulator() {
return (area, rectangle2D) -> area.add(new Area(rectangle2D));
return (bb, rect) -> bb.addRectangle(rect.getMinX(), rect.getMinY(), rect.getMaxX(), rect.getMaxY());
}
@Override
public BinaryOperator<Area> combiner() {
public BinaryOperator<BBox> combiner() {
return (area1, area2) -> {
area1.add(area2);
return area1;
};
return (b1, b2) -> new BBox(Math.min(b1.lowerLeftX, b2.lowerLeftX),
Math.min(b1.lowerLeftY, b2.lowerLeftY),
Math.max(b1.upperRightX, b2.upperRightX),
Math.max(b1.upperRightY, b2.upperRightY));
}
@Override
public Function<Area, Rectangle2D> finisher() {
public Function<BBox, Rectangle2D> finisher() {
return Area::getBounds2D;
return bb -> new Rectangle2D.Double(bb.lowerLeftX, bb.lowerLeftY, bb.upperRightX - bb.lowerLeftX, bb.upperRightY - bb.lowerLeftY);
}
@Override
public Set<Characteristics> characteristics() {
return Set.of(Characteristics.CONCURRENT, Characteristics.UNORDERED);
return Set.of(Characteristics.UNORDERED);
}
@AllArgsConstructor
@NoArgsConstructor
private static class BBox {
Double lowerLeftX;
Double lowerLeftY;
Double upperRightX;
Double upperRightY;
public void addRectangle(double lowerLeftX, double lowerLeftY, double upperRightX, double upperRightY) {
if (this.lowerLeftX == null) {
this.lowerLeftX = lowerLeftX;
} else if (this.lowerLeftX > lowerLeftX) {
this.lowerLeftX = lowerLeftX;
}
if (this.lowerLeftY == null) {
this.lowerLeftY = lowerLeftY;
} else if (this.lowerLeftY > lowerLeftY) {
this.lowerLeftY = lowerLeftY;
}
if (this.upperRightX == null) {
this.upperRightX = upperRightX;
} else if (this.upperRightX < upperRightX) {
this.upperRightX = upperRightX;
}
if (this.upperRightY == null) {
this.upperRightY = upperRightY;
} else if (this.upperRightY < upperRightY) {
this.upperRightY = upperRightY;
}
}
}
}
}
}

View File

@ -35,12 +35,16 @@ public class BuildDocumentGraphTest extends BaseTest {
@SneakyThrows
protected Document buildGraph(String filename) {
if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06")) {
prepareStorage(filename + ".pdf", "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json");
} else {
prepareStorage(filename + ".pdf");
if (!filename.endsWith(".pdf")) {
filename = filename + ".pdf";
}
ClassPathResource fileResource = new ClassPathResource(filename + ".pdf");
if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06")) {
prepareStorage(filename, "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json");
} else {
prepareStorage(filename);
}
ClassPathResource fileResource = new ClassPathResource(filename);
try (InputStream inputStream = fileResource.getInputStream()) {
PDDocument pdDocument = Loader.loadPDF(inputStream);

View File

@ -1,5 +1,45 @@
package com.knecon.fforesight.service.layoutparser.server.graph;
public class DocumentDataTests {
import java.io.File;
import java.io.FileOutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import org.junit.jupiter.api.Test;
import org.springframework.core.io.ClassPathResource;
import com.iqser.red.commons.jackson.ObjectMapperFactory;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
import com.knecon.fforesight.service.layoutparser.processor.mapper.redaction.DocumentDataMapper;
import lombok.SneakyThrows;
public class DocumentDataTests extends BuildDocumentGraphTest{
@Test
@SneakyThrows
public void createDocumentDataForAllFiles() {
String outPath = "/tmp/document_data_output_layoutparser";
ClassPathResource resource = new ClassPathResource("files");
List<String> pdfFileNames = Files.walk(resource.getFile().toPath())
.filter(path -> path.getFileName().toString().endsWith(".pdf"))
.map(Path::toAbsolutePath)
.map(Path::toString)
.toList();
System.out.printf("%d Files found%n", pdfFileNames.size());
for (int i = 0; i < pdfFileNames.size(); i++) {
System.out.printf("%d/%d: %s%n", i, pdfFileNames.size(), pdfFileNames.get(i));
}
for (String pdfFileName : pdfFileNames) {
System.out.println(pdfFileName);
DocumentData documentData = DocumentDataMapper.toDocumentData(buildGraph(resource.getFile().toPath().getParent().relativize(Path.of(pdfFileName)).toString()));
File outputFile = Path.of(outPath).resolve(resource.getFile().toPath().relativize(Path.of(pdfFileName))).toFile();
outputFile.toPath().getParent().toFile().mkdirs();
try (var out = new FileOutputStream(outputFile.toString().replace(".pdf", ".json"))) {
ObjectMapperFactory.create().writeValue(out, documentData);
}
}
}
}

View File

@ -20,7 +20,7 @@ public class DocumentGraphJsonWritingTest extends BuildDocumentGraphTest {
@Disabled
public void writeJsonForFileTest() {
writeJsons("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06");
writeJsons("files/216");
}
@SneakyThrows