From 5792ff4a9357180f72b046cd1db3939f7d2431dc Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Tue, 5 Sep 2023 16:54:23 +0200 Subject: [PATCH] TAAS-104: merge visually intersecting Paragraphs * fix build --- .../processor/model/AbstractPageBlock.java | 5 +- .../TaasBlockificationService.java | 6 +- .../utils/TextPageBlockComparator.java | 64 ------------------- 3 files changed, 8 insertions(+), 67 deletions(-) delete mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/TextPageBlockComparator.java diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/AbstractPageBlock.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/AbstractPageBlock.java index 91ce8f1..16a468e 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/AbstractPageBlock.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/AbstractPageBlock.java @@ -71,13 +71,13 @@ public abstract class AbstractPageBlock { return maxX - minX; } - public abstract boolean isEmpty(); public boolean intersectsY(AbstractPageBlock apb) { return this.minY <= apb.getMaxY() && this.maxY >= apb.getMinY(); } + public boolean almostIntersects(AbstractPageBlock apb, float yThreshold, float xThreshold) { return this.almostIntersectsX(apb, xThreshold) && this.almostIntersectsY(apb, yThreshold); @@ -95,4 +95,7 @@ public abstract class AbstractPageBlock { return this.minX - threshold <= apb.getMaxX() && this.maxX + threshold >= apb.getMinX(); } + + public abstract boolean isEmpty(); + } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/TaasBlockificationService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/TaasBlockificationService.java index 428d529..f16198d 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/TaasBlockificationService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/TaasBlockificationService.java @@ -1,6 +1,8 @@ package com.knecon.fforesight.service.layoutparser.processor.services.blockification; -import static com.knecon.fforesight.service.layoutparser.processor.services.factory.SearchTextWithTextPositionFactory.HEIGHT_PADDING; + +// TODO: figure out, why this fails the build +// import static com.knecon.fforesight.service.layoutparser.processor.services.factory.SearchTextWithTextPositionFactory.HEIGHT_PADDING; import java.util.ArrayList; import java.util.HashSet; @@ -28,7 +30,7 @@ public class TaasBlockificationService { private static final float THRESHOLD = 1f; private static final float Y_GAP_SPLIT_HEIGHT_MODIFIER = 1.25f; // multiplied with text height - private static final float INTERSECTS_Y_THRESHOLD = 2 * HEIGHT_PADDING; // This is exactly 2 times our position height padding. This is required to find boxes that are visually intersecting. + private static final float INTERSECTS_Y_THRESHOLD = 4;// 2 * HEIGHT_PADDING // This is exactly 2 times our position height padding. This is required to find boxes that are visually intersecting. private static final int X_GAP_SPLIT_CONSTANT = 50; public static final int X_ALIGNMENT_THRESHOLD = 1; public static final int SMALL_Y_GAP_THRESHOLD = 5; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/TextPageBlockComparator.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/TextPageBlockComparator.java deleted file mode 100644 index 56afc16..0000000 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/TextPageBlockComparator.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.knecon.fforesight.service.layoutparser.processor.utils; - -import java.util.Comparator; - -import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock; - -public class TextPageBlockComparator implements Comparator -{ - @Override - public int compare(TextPageBlock pos1, TextPageBlock pos2) - { - // only compare text that is in the same direction - int cmp1 = Float.compare(pos1.getDir().getDegrees(), pos2.getDir().getDegrees()); - if (cmp1 != 0) - { - return cmp1; - } - - // get the text direction adjusted coordinates - float x1 = pos1.getMinX(); - float x2 = pos2.getMinX(); - - float pos1YBottom = pos1.getMaxY(); - float pos2YBottom = pos2.getMaxY(); - - // note that the coordinates have been adjusted so 0,0 is in upper left - float pos1YTop = pos1YBottom - pos1.getHeight(); - float pos2YTop = pos2YBottom - pos2.getHeight(); - - float yDifference = Math.abs(pos1YBottom - pos2YBottom); - - // we will do a simple tolerance comparison - if (yDifference < .1 || - pos2YBottom >= pos1YTop && pos2YBottom <= pos1YBottom || - pos1YBottom >= pos2YTop && pos1YBottom <= pos2YBottom) - { - return Float.compare(x1, x2); - } - else if (pos1YBottom < pos2YBottom) - { - return -1; - } - else - { - return 1; - } - } -}