RED-7081: getBBox() Performance Improvement
This commit is contained in:
parent
788613c92e
commit
15a6d46f5c
@ -208,7 +208,7 @@ public class AtomicTextBlock implements TextBlock {
|
||||
List<Rectangle2D> rectanglesPerLine = stringBoundary.split(getAllLineBreaksInBoundary(stringBoundary))
|
||||
.stream()
|
||||
.map(this::getPositions)
|
||||
.map(RectangleTransformations::rectangleUnionWithGaps)
|
||||
.map(RectangleTransformations::rectangleBBoxWithGaps)
|
||||
.flatMap(Collection::stream)
|
||||
.toList();
|
||||
Map<Page, List<Rectangle2D>> rectanglePerLinePerPage = new HashMap<>();
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.utils;
|
||||
|
||||
import java.awt.geom.Area;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.awt.geom.RectangularShape;
|
||||
import java.util.Collections;
|
||||
@ -19,11 +18,14 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlo
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.graph.textblock.AtomicTextBlock;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
public class RectangleTransformations {
|
||||
|
||||
public static PDRectangle toPDRectangleUnion(List<Rectangle> rectangles) {
|
||||
public static PDRectangle toPDRectangleBBox(List<Rectangle> rectangles) {
|
||||
|
||||
Rectangle2D rectangle2D = RectangleTransformations.bBoxUnionRectangle(rectangles);
|
||||
Rectangle2D rectangle2D = RectangleTransformations.rectangleBBox(rectangles);
|
||||
|
||||
PDRectangle annotationPosition = new PDRectangle();
|
||||
annotationPosition.setLowerLeftX((float) rectangle2D.getMinX());
|
||||
@ -34,15 +36,15 @@ public class RectangleTransformations {
|
||||
}
|
||||
|
||||
|
||||
public static Rectangle2D bBoxUnionAtomicTextBlock(List<AtomicTextBlock> atomicTextBlocks) {
|
||||
public static Rectangle2D atomicTextBlockBBox(List<AtomicTextBlock> atomicTextBlocks) {
|
||||
|
||||
return atomicTextBlocks.stream().flatMap(atomicTextBlock -> atomicTextBlock.getPositions().stream()).collect(new Rectangle2DUnion());
|
||||
return atomicTextBlocks.stream().flatMap(atomicTextBlock -> atomicTextBlock.getPositions().stream()).collect(new Rectangle2DBBoxCollector());
|
||||
}
|
||||
|
||||
|
||||
public static Rectangle2D bBoxUnionRectangle(List<Rectangle> rectangles) {
|
||||
public static Rectangle2D rectangleBBox(List<Rectangle> rectangles) {
|
||||
|
||||
return rectangles.stream().map(RectangleTransformations::toRectangle2D).collect(new Rectangle2DUnion());
|
||||
return rectangles.stream().map(RectangleTransformations::toRectangle2D).collect(new Rectangle2DBBoxCollector());
|
||||
}
|
||||
|
||||
|
||||
@ -64,9 +66,9 @@ public class RectangleTransformations {
|
||||
}
|
||||
|
||||
|
||||
public static Rectangle2D rectangleUnion(List<Rectangle2D> rectangle2DList) {
|
||||
public static Rectangle2D rectangle2DBBox(List<Rectangle2D> rectangle2DList) {
|
||||
|
||||
return rectangle2DList.stream().collect(new Rectangle2DUnion());
|
||||
return rectangle2DList.stream().collect(new Rectangle2DBBoxCollector());
|
||||
}
|
||||
|
||||
|
||||
@ -76,7 +78,7 @@ public class RectangleTransformations {
|
||||
* @param rectangle2DList A list of rectangles to combine
|
||||
* @return A list of rectangles which are combined if they are closer than the split threshold
|
||||
*/
|
||||
public static List<Rectangle2D> rectangleUnionWithGaps(List<Rectangle2D> rectangle2DList) {
|
||||
public static List<Rectangle2D> rectangleBBoxWithGaps(List<Rectangle2D> rectangle2DList) {
|
||||
|
||||
if (rectangle2DList.isEmpty()) {
|
||||
return Collections.emptyList();
|
||||
@ -98,49 +100,87 @@ public class RectangleTransformations {
|
||||
previousRectangle = currentRectangle;
|
||||
}
|
||||
}
|
||||
return rectangleListsWithGaps.stream().map(RectangleTransformations::rectangleUnion).toList();
|
||||
return rectangleListsWithGaps.stream().map(RectangleTransformations::rectangle2DBBox).toList();
|
||||
}
|
||||
|
||||
|
||||
private static class Rectangle2DUnion implements Collector<Rectangle2D, Area, Rectangle2D> {
|
||||
private static class Rectangle2DBBoxCollector implements Collector<Rectangle2D, Rectangle2DBBoxCollector.BBox, Rectangle2D> {
|
||||
|
||||
@Override
|
||||
public Supplier<Area> supplier() {
|
||||
public Supplier<BBox> supplier() {
|
||||
|
||||
return Area::new;
|
||||
return BBox::new;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public BiConsumer<Area, Rectangle2D> accumulator() {
|
||||
public BiConsumer<BBox, Rectangle2D> accumulator() {
|
||||
|
||||
return (area, rectangle2D) -> area.add(new Area(rectangle2D));
|
||||
return (bb, rect) -> bb.addRectangle(rect.getMinX(), rect.getMinY(), rect.getMaxX(), rect.getMaxY());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public BinaryOperator<Area> combiner() {
|
||||
public BinaryOperator<BBox> combiner() {
|
||||
|
||||
return (area1, area2) -> {
|
||||
area1.add(area2);
|
||||
return area1;
|
||||
};
|
||||
return (b1, b2) -> new BBox(Math.min(b1.lowerLeftX, b2.lowerLeftX),
|
||||
Math.min(b1.lowerLeftY, b2.lowerLeftY),
|
||||
Math.max(b1.upperRightX, b2.upperRightX),
|
||||
Math.max(b1.upperRightY, b2.upperRightY));
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Function<Area, Rectangle2D> finisher() {
|
||||
public Function<BBox, Rectangle2D> finisher() {
|
||||
|
||||
return Area::getBounds2D;
|
||||
return bb -> new Rectangle2D.Double(bb.lowerLeftX, bb.lowerLeftY, bb.upperRightX - bb.lowerLeftX, bb.upperRightY - bb.lowerLeftY);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Set<Characteristics> characteristics() {
|
||||
|
||||
return Set.of(Characteristics.CONCURRENT, Characteristics.UNORDERED);
|
||||
return Set.of(Characteristics.UNORDERED);
|
||||
}
|
||||
|
||||
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
private static class BBox {
|
||||
|
||||
Double lowerLeftX;
|
||||
Double lowerLeftY;
|
||||
Double upperRightX;
|
||||
Double upperRightY;
|
||||
|
||||
|
||||
public void addRectangle(double lowerLeftX, double lowerLeftY, double upperRightX, double upperRightY) {
|
||||
|
||||
if (this.lowerLeftX == null) {
|
||||
this.lowerLeftX = lowerLeftX;
|
||||
} else if (this.lowerLeftX > lowerLeftX) {
|
||||
this.lowerLeftX = lowerLeftX;
|
||||
}
|
||||
if (this.lowerLeftY == null) {
|
||||
this.lowerLeftY = lowerLeftY;
|
||||
} else if (this.lowerLeftY > lowerLeftY) {
|
||||
this.lowerLeftY = lowerLeftY;
|
||||
}
|
||||
if (this.upperRightX == null) {
|
||||
this.upperRightX = upperRightX;
|
||||
} else if (this.upperRightX < upperRightX) {
|
||||
this.upperRightX = upperRightX;
|
||||
}
|
||||
if (this.upperRightY == null) {
|
||||
this.upperRightY = upperRightY;
|
||||
} else if (this.upperRightY < upperRightY) {
|
||||
this.upperRightY = upperRightY;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@ -35,12 +35,16 @@ public class BuildDocumentGraphTest extends BaseTest {
|
||||
@SneakyThrows
|
||||
protected Document buildGraph(String filename) {
|
||||
|
||||
if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06")) {
|
||||
prepareStorage(filename + ".pdf", "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json");
|
||||
} else {
|
||||
prepareStorage(filename + ".pdf");
|
||||
if (!filename.endsWith(".pdf")) {
|
||||
filename = filename + ".pdf";
|
||||
}
|
||||
ClassPathResource fileResource = new ClassPathResource(filename + ".pdf");
|
||||
|
||||
if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06")) {
|
||||
prepareStorage(filename, "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json");
|
||||
} else {
|
||||
prepareStorage(filename);
|
||||
}
|
||||
ClassPathResource fileResource = new ClassPathResource(filename);
|
||||
|
||||
try (InputStream inputStream = fileResource.getInputStream()) {
|
||||
PDDocument pdDocument = Loader.loadPDF(inputStream);
|
||||
|
||||
@ -1,5 +1,45 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server.graph;
|
||||
|
||||
public class DocumentDataTests {
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.iqser.red.commons.jackson.ObjectMapperFactory;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.mapper.redaction.DocumentDataMapper;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
public class DocumentDataTests extends BuildDocumentGraphTest{
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void createDocumentDataForAllFiles() {
|
||||
|
||||
String outPath = "/tmp/document_data_output_layoutparser";
|
||||
|
||||
ClassPathResource resource = new ClassPathResource("files");
|
||||
List<String> pdfFileNames = Files.walk(resource.getFile().toPath())
|
||||
.filter(path -> path.getFileName().toString().endsWith(".pdf"))
|
||||
.map(Path::toAbsolutePath)
|
||||
.map(Path::toString)
|
||||
.toList();
|
||||
System.out.printf("%d Files found%n", pdfFileNames.size());
|
||||
for (int i = 0; i < pdfFileNames.size(); i++) {
|
||||
System.out.printf("%d/%d: %s%n", i, pdfFileNames.size(), pdfFileNames.get(i));
|
||||
}
|
||||
for (String pdfFileName : pdfFileNames) {
|
||||
System.out.println(pdfFileName);
|
||||
DocumentData documentData = DocumentDataMapper.toDocumentData(buildGraph(resource.getFile().toPath().getParent().relativize(Path.of(pdfFileName)).toString()));
|
||||
File outputFile = Path.of(outPath).resolve(resource.getFile().toPath().relativize(Path.of(pdfFileName))).toFile();
|
||||
outputFile.toPath().getParent().toFile().mkdirs();
|
||||
try (var out = new FileOutputStream(outputFile.toString().replace(".pdf", ".json"))) {
|
||||
ObjectMapperFactory.create().writeValue(out, documentData);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -20,7 +20,7 @@ public class DocumentGraphJsonWritingTest extends BuildDocumentGraphTest {
|
||||
@Disabled
|
||||
public void writeJsonForFileTest() {
|
||||
|
||||
writeJsons("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06");
|
||||
writeJsons("files/216");
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user