RED-9760: change compareDouble to something sensible
This commit is contained in:
parent
173911b840
commit
8e115dcd8a
@ -9,6 +9,7 @@ import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
@ -118,18 +119,14 @@ public class LayoutParsingPipeline {
|
||||
log.info("Starting layout parsing for {}", layoutParsingRequest.identifier());
|
||||
|
||||
File originFile = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId());
|
||||
File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId())
|
||||
.orElse(originFile);
|
||||
File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()).orElse(originFile);
|
||||
|
||||
VisualLayoutParsingResponse visualLayoutParsingResponse = layoutParsingRequest.visualLayoutParsingFileId()
|
||||
.map(layoutParsingStorageService::getVisualLayoutParsingFile)
|
||||
.orElse(new VisualLayoutParsingResponse());
|
||||
.map(layoutParsingStorageService::getVisualLayoutParsingFile).orElse(new VisualLayoutParsingResponse());
|
||||
ImageServiceResponse imageServiceResponse = layoutParsingRequest.imagesFileStorageId()
|
||||
.map(layoutParsingStorageService::getImagesFile)
|
||||
.orElse(new ImageServiceResponse());
|
||||
.map(layoutParsingStorageService::getImagesFile).orElse(new ImageServiceResponse());
|
||||
TableServiceResponse tableServiceResponse = layoutParsingRequest.tablesFileStorageId()
|
||||
.map(layoutParsingStorageService::getTablesFile)
|
||||
.orElse(new TableServiceResponse());
|
||||
.map(layoutParsingStorageService::getTablesFile).orElse(new TableServiceResponse());
|
||||
|
||||
ClassificationDocument classificationDocument = parseLayout(settings.getLayoutParsingTypeOverride() == null //
|
||||
? layoutParsingRequest.layoutParsingType() : settings.getLayoutParsingTypeOverride(),
|
||||
@ -278,7 +275,9 @@ public class LayoutParsingPipeline {
|
||||
stripper.getText(originDocument);
|
||||
List<TextPositionSequence> words = stripper.getTextPositionSequences();
|
||||
if (layoutParsingType.equals(LayoutParsingType.DOCUMINE_OLD)) {
|
||||
words = TextPositionOperations.sort(words);
|
||||
var lines = TextPositionOperations.groupByLine(new HashSet<>(words));
|
||||
classificationDocument.getLayoutDebugLayer().addLineVisualizationsFromNestedTextPosition(lines, pageNumber);
|
||||
words = TextPositionOperations.sortLines(lines);
|
||||
}
|
||||
classificationDocument.getLayoutDebugLayer().addTextVisualizations(words, pageNumber);
|
||||
|
||||
|
||||
@ -7,9 +7,12 @@ public class DoubleUtils {
|
||||
if (Double.isNaN(d1) || Double.isNaN(d2)) {
|
||||
return Double.compare(d1, d2);
|
||||
}
|
||||
long i1 = Math.round(d1 / (precision == 0 ? 1 : precision));
|
||||
long i2 = Math.round(d2 / (precision == 0 ? 1 : precision));
|
||||
return Long.compare(i1, i2);
|
||||
|
||||
if (Math.abs(d1 - d2) < precision) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return Double.compare(d1, d2);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -27,7 +27,7 @@ public class TextPositionOperations {
|
||||
public static final double MAX_LINE_HEIGHT_FACTOR = 0.66; // multiplied with max word height
|
||||
public static final double MAX_WORD_DISTANCE_FACTOR = 3.5; // multiplied with max word width
|
||||
|
||||
private static final double Y_THRESHOLD = 6;
|
||||
private static final double Y_THRESHOLD = 5;
|
||||
private static final double X_THRESHOLD = 3;
|
||||
private static final Comparator<TextBoundingBox> COMPARATOR_DIR_ADJ = //
|
||||
Comparator.comparing(TextBoundingBox::getDir)
|
||||
@ -53,13 +53,19 @@ public class TextPositionOperations {
|
||||
|
||||
private List<TextPositionSequence> sortUsingLineDetection(Set<TextPositionSequence> sequences) {
|
||||
|
||||
return groupByLine(sequences).stream()
|
||||
return sortLines(groupByLine(sequences));
|
||||
|
||||
}
|
||||
|
||||
|
||||
public List<TextPositionSequence> sortLines(Collection<Set<TextPositionSequence>> lines) {
|
||||
|
||||
return lines.stream()
|
||||
.map(TextPositionOperations::sortByXDirAdj)
|
||||
.filter(line -> !line.isEmpty())
|
||||
.sorted(Comparator.comparing(line -> line.get(0), COMPARATOR_DIR_ADJ))
|
||||
.flatMap(Collection::stream)
|
||||
.toList();
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -71,7 +77,7 @@ public class TextPositionOperations {
|
||||
}
|
||||
|
||||
|
||||
private Collection<Set<TextPositionSequence>> groupByLine(Set<TextPositionSequence> sequences) {
|
||||
public Collection<Set<TextPositionSequence>> groupByLine(Set<TextPositionSequence> sequences) {
|
||||
|
||||
double maxLineDistance = sequences.stream()
|
||||
.map(TextPositionSequence::getBBoxDirAdj)
|
||||
|
||||
@ -6,6 +6,7 @@ import java.awt.geom.Point2D;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
|
||||
@ -150,6 +151,23 @@ public class LayoutDebugLayer extends LayoutDebugLayerConfig {
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void addLineVisualizationsFromNestedTextPosition(Collection<Set<TextPositionSequence>> lines, int pageNumber) {
|
||||
|
||||
if (!active) {
|
||||
return;
|
||||
}
|
||||
VisualizationsOnPage visualizationsOnPage = getOrCreateVisualizationsOnPage(pageNumber, this.lines);
|
||||
visualizationsOnPage.getColoredRectangles()
|
||||
.addAll(lines.stream()
|
||||
.map(line -> line.stream()
|
||||
.map(BoundingBox::getBBoxPdf)
|
||||
.collect(RectangleTransformations.collectBBox()))
|
||||
.map(line -> new ColoredRectangle(line, LINES_COLOR, 0.5f))
|
||||
.toList());
|
||||
|
||||
}
|
||||
|
||||
public void addTextBlockVisualizations(List<TextPageBlock> textPageBlocks, int page) {
|
||||
|
||||
if (!active) {
|
||||
@ -235,4 +253,5 @@ public class LayoutDebugLayer extends LayoutDebugLayerConfig {
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user