outputs almost equal current redaction-service in regards to RedactManager
* 3/200 files have minimal whitespace/sorting errors, most likely rounding errors
This commit is contained in:
parent
a41c13fdd6
commit
270129cd73
@ -102,6 +102,7 @@ public class LayoutParsingPipeline {
|
||||
}
|
||||
|
||||
sectionsBuilderService.buildSections(classificationDocument);
|
||||
sectionsBuilderService.addImagesToSections(classificationDocument);
|
||||
|
||||
return DocumentGraphFactory.buildDocumentGraph(classificationDocument);
|
||||
}
|
||||
|
||||
@ -30,14 +30,18 @@ public class Ruling extends Line2D.Float {
|
||||
|
||||
public Ruling straightenVertical() {
|
||||
|
||||
double y1 = Math.min(getY1(), getY2());
|
||||
double y2 = Math.max(getY1(), getY2());
|
||||
double x = (getX1() + getX2()) / 2;
|
||||
return new Ruling(new Point2D.Double(x, getY1()), new Point2D.Double(x, getY2()));
|
||||
return new Ruling(new Point2D.Double(x, y1), new Point2D.Double(x, y2));
|
||||
}
|
||||
|
||||
public Ruling straightenHorizonatl() {
|
||||
public Ruling straightenHorizontal() {
|
||||
|
||||
double x1 = Math.min(getX1(), getX2());
|
||||
double x2 = Math.max(getX1(), getX2());
|
||||
double y = (getY1() + getY2()) / 2;
|
||||
return new Ruling(new Point2D.Double(getX1(), y), new Point2D.Double(getX2(), y));
|
||||
return new Ruling(new Point2D.Double(x1, y), new Point2D.Double(x2, y));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -108,7 +108,7 @@ public class PdfParsingService {
|
||||
imageServiceResponseAdapter.findOcr(classificationPage);
|
||||
}
|
||||
|
||||
tableExtractionService.extractTables(cleanRulings, classificationPage);
|
||||
tableExtractionService.extractTables(cleanRulings, classificationPage, layoutParsingType);
|
||||
buildPageStatistics(classificationPage);
|
||||
increaseDocumentStatistics(classificationPage, document);
|
||||
|
||||
|
||||
@ -53,7 +53,10 @@ public class RulingCleaningService {
|
||||
}
|
||||
List<Ruling> horizontalRulingLines = collapseOrientedRulings(hrs);
|
||||
|
||||
return CleanRulings.builder().vertical(verticalRulingLines).horizontal(horizontalRulingLines).build();
|
||||
return CleanRulings.builder()
|
||||
.vertical(verticalRulingLines)
|
||||
.horizontal(horizontalRulingLines)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -12,6 +12,7 @@ import java.util.Set;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
|
||||
@ -78,9 +79,9 @@ public class TableExtractionService {
|
||||
* @param cleanRulings The lines used to build the table.
|
||||
* @param page Page object that contains textblocks and statistics.
|
||||
*/
|
||||
public void extractTables(CleanRulings cleanRulings, ClassificationPage page) {
|
||||
public void extractTables(CleanRulings cleanRulings, ClassificationPage page, LayoutParsingType layoutParsingType) {
|
||||
|
||||
List<Cell> cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical());
|
||||
List<Cell> cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical(), layoutParsingType);
|
||||
|
||||
List<TextPageBlock> toBeRemoved = new ArrayList<>();
|
||||
|
||||
@ -134,13 +135,16 @@ public class TableExtractionService {
|
||||
}
|
||||
|
||||
|
||||
public List<Cell> findCells(List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
|
||||
public List<Cell> findCells(List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines, LayoutParsingType layoutParsingType) {
|
||||
|
||||
for (Ruling r : horizontalRulingLines) {
|
||||
if (r.getX2() < r.getX1()) {
|
||||
double a = r.getX2();
|
||||
r.x2 = (float) r.getX1();
|
||||
r.x1 = (float) a;
|
||||
if (layoutParsingType.equals(LayoutParsingType.TAAS)) {
|
||||
// TODO: breaks some tables, for example "1 Abamectin Prr.pdf" try to fix this upstream in RulingCleaningService
|
||||
for (Ruling r : horizontalRulingLines) {
|
||||
if (r.getX2() < r.getX1()) {
|
||||
double a = r.getX2();
|
||||
r.x2 = (float) r.getX1();
|
||||
r.x1 = (float) a;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user