outputs almost equal current redaction-service in regards to RedactManager

* 3/200 files have minimal whitespace/sorting errors, most likely rounding errors
This commit is contained in:
Kilian Schuettler 2023-07-25 18:12:57 +02:00
parent a41c13fdd6
commit 270129cd73
5 changed files with 25 additions and 13 deletions

View File

@ -102,6 +102,7 @@ public class LayoutParsingPipeline {
}
sectionsBuilderService.buildSections(classificationDocument);
sectionsBuilderService.addImagesToSections(classificationDocument);
return DocumentGraphFactory.buildDocumentGraph(classificationDocument);
}

View File

@ -30,14 +30,18 @@ public class Ruling extends Line2D.Float {
public Ruling straightenVertical() {
double y1 = Math.min(getY1(), getY2());
double y2 = Math.max(getY1(), getY2());
double x = (getX1() + getX2()) / 2;
return new Ruling(new Point2D.Double(x, getY1()), new Point2D.Double(x, getY2()));
return new Ruling(new Point2D.Double(x, y1), new Point2D.Double(x, y2));
}
public Ruling straightenHorizonatl() {
public Ruling straightenHorizontal() {
double x1 = Math.min(getX1(), getX2());
double x2 = Math.max(getX1(), getX2());
double y = (getY1() + getY2()) / 2;
return new Ruling(new Point2D.Double(getX1(), y), new Point2D.Double(getX2(), y));
return new Ruling(new Point2D.Double(x1, y), new Point2D.Double(x2, y));
}

View File

@ -108,7 +108,7 @@ public class PdfParsingService {
imageServiceResponseAdapter.findOcr(classificationPage);
}
tableExtractionService.extractTables(cleanRulings, classificationPage);
tableExtractionService.extractTables(cleanRulings, classificationPage, layoutParsingType);
buildPageStatistics(classificationPage);
increaseDocumentStatistics(classificationPage, document);

View File

@ -53,7 +53,10 @@ public class RulingCleaningService {
}
List<Ruling> horizontalRulingLines = collapseOrientedRulings(hrs);
return CleanRulings.builder().vertical(verticalRulingLines).horizontal(horizontalRulingLines).build();
return CleanRulings.builder()
.vertical(verticalRulingLines)
.horizontal(horizontalRulingLines)
.build();
}

View File

@ -12,6 +12,7 @@ import java.util.Set;
import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
@ -78,9 +79,9 @@ public class TableExtractionService {
* @param cleanRulings The lines used to build the table.
* @param page Page object that contains textblocks and statistics.
*/
public void extractTables(CleanRulings cleanRulings, ClassificationPage page) {
public void extractTables(CleanRulings cleanRulings, ClassificationPage page, LayoutParsingType layoutParsingType) {
List<Cell> cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical());
List<Cell> cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical(), layoutParsingType);
List<TextPageBlock> toBeRemoved = new ArrayList<>();
@ -134,13 +135,16 @@ public class TableExtractionService {
}
public List<Cell> findCells(List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
public List<Cell> findCells(List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines, LayoutParsingType layoutParsingType) {
for (Ruling r : horizontalRulingLines) {
if (r.getX2() < r.getX1()) {
double a = r.getX2();
r.x2 = (float) r.getX1();
r.x1 = (float) a;
if (layoutParsingType.equals(LayoutParsingType.TAAS)) {
// TODO: breaks some tables, for example "1 Abamectin Prr.pdf" try to fix this upstream in RulingCleaningService
for (Ruling r : horizontalRulingLines) {
if (r.getX2() < r.getX1()) {
double a = r.getX2();
r.x2 = (float) r.getX1();
r.x1 = (float) a;
}
}
}