diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java index b3cdb6b..81aa0de 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java @@ -29,13 +29,13 @@ import com.knecon.fforesight.service.layoutparser.processor.model.Classification import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document; import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage; import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings; +import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling; import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock; import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence; import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.CvTableParsingAdapter; import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.ImageServiceResponseAdapter; import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.VisualLayoutParsingAdapter; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; -import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCells; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; import com.knecon.fforesight.service.layoutparser.processor.services.BodyTextFrameService; @@ -219,7 +219,7 @@ public class LayoutParsingPipeline { PDDocument originDocument = openDocument(originFile); addNumberOfPagesToTrace(originDocument.getNumberOfPages(), Files.size(originFile.toPath())); - Map> pdfTableCells = cvTableParsingAdapter.buildCvParsedTablesPerPage(tableServiceResponse); + Map> pdfTableRulings = cvTableParsingAdapter.buildCvParsedRulingsPerPage(tableServiceResponse); Map> pdfImages = imageServiceResponseAdapter.buildClassifiedImagesPerPage(imageServiceResponse); Map> signatures = visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse); ClassificationDocument classificationDocument = new ClassificationDocument(); @@ -258,7 +258,7 @@ public class LayoutParsingPipeline { boolean isLandscape = pdr.getWidth() > pdr.getHeight() && (rotation == 0 || rotation == 180) || pdr.getHeight() > pdr.getWidth() && (rotation == 90 || rotation == 270); PDRectangle cropbox = pdPage.getCropBox(); - CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(pdfTableCells.get(pageNumber), stripper.getRulings()); + CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(pdfTableRulings.get(pageNumber), stripper.getRulings()); ClassificationPage classificationPage = switch (layoutParsingType) { case REDACT_MANAGER -> redactManagerBlockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical()); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java index 471db6a..88dd19c 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java @@ -36,6 +36,7 @@ public class LayoutParsingStorageService { private final StorageService storageService; private final ObjectMapper objectMapper; + @Observed(name = "LayoutParsingStorageService", contextualName = "get-origin-file") public File getOriginFile(String storageId) throws IOException { @@ -61,28 +62,25 @@ public class LayoutParsingStorageService { try (InputStream inputStream = getObject(storageId)) { - ImageServiceResponse imageServiceResponse = objectMapper.readValue(inputStream, ImageServiceResponse.class); - inputStream.close(); - return imageServiceResponse; + return objectMapper.readValue(inputStream, ImageServiceResponse.class); } } public TableServiceResponse getTablesFile(String storageId) throws IOException { - try (var tableClassificationStream = getObject(storageId)) { + try (InputStream tableClassificationStream = getObject(storageId)) { - TableServiceResponse tableServiceResponse = objectMapper.readValue(tableClassificationStream, TableServiceResponse.class); - tableClassificationStream.close(); - return tableServiceResponse; + return objectMapper.readValue(tableClassificationStream, TableServiceResponse.class); } } + public VisualLayoutParsingResponse getVisualLayoutParsingFile(String storageId) throws IOException { try (InputStream inputStream = getObject(storageId)) { - VisualLayoutParsingResponse visualLayoutParsingResponse = objectMapper.readValue(inputStream, VisualLayoutParsingResponse.class); - return visualLayoutParsingResponse; + + return objectMapper.readValue(inputStream, VisualLayoutParsingResponse.class); } } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/CvTableParsingAdapter.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/CvTableParsingAdapter.java index 287831e..77c1681 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/CvTableParsingAdapter.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/CvTableParsingAdapter.java @@ -1,15 +1,19 @@ package com.knecon.fforesight.service.layoutparser.processor.python_api.adapter; +import java.awt.geom.Point2D; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import org.springframework.stereotype.Service; +import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.PageInfo; -import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCells; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCell; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableLine; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; import lombok.RequiredArgsConstructor; @@ -20,33 +24,24 @@ import lombok.extern.slf4j.Slf4j; @RequiredArgsConstructor public class CvTableParsingAdapter { - public Map> buildCvParsedTablesPerPage(TableServiceResponse tableServiceResponse) { + public Map> buildCvParsedRulingsPerPage(TableServiceResponse tableServiceResponse) { - Map> tableCells = new HashMap<>(); + Map> rulings = new HashMap<>(); tableServiceResponse.getData() - .forEach(tableData -> tableCells.computeIfAbsent(tableData.getPageInfo().getNumber(), tableCell -> new ArrayList<>()) - .addAll(convertTableCells(tableData.getTableCells(), tableData.getPageInfo()))); + .forEach(tableData -> rulings.computeIfAbsent(tableData.getPageInfo().getNumber(), ruling -> new ArrayList<>()) + .addAll(convertTableLines(tableData.getTableLines(), tableData.getPageInfo()))); - return tableCells; + return rulings; } - private Collection convertTableCells(List tableCells, PageInfo pageInfo) { + private Collection convertTableLines(List tableLines, PageInfo pageInfo) { - List cvParsedTableCells = new ArrayList<>(); + List cvParsedRulings = new ArrayList<>(); - tableCells.stream() - .filter(cell -> cell.getWidth() < pageInfo.getWidth() * 0.98 && cell.getHeight() < pageInfo.getHeight() * 0.98) - .forEach(t -> cvParsedTableCells.add(TableCells.builder() - .y0(t.getY0()) - .x1(t.getX1()) - .y1(t.getY1()) - .x0(t.getX0()) - .width(t.getWidth()) - .height(t.getHeight()) - .build())); + tableLines.forEach(l -> cvParsedRulings.add(new Ruling(new Point2D.Double(l.getX0(), l.getY0()), new Point2D.Double(l.getX1(), l.getY1())))); - return cvParsedTableCells; + return cvParsedRulings.stream().filter(ruling -> ruling.getWidth() < pageInfo.getWidth() * 0.98 && ruling.getHeight() < pageInfo.getHeight() * 0.98).toList(); } } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/TableCells.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/TableCell.java similarity index 93% rename from layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/TableCells.java rename to layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/TableCell.java index 861b7e7..7bf8716 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/TableCells.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/TableCell.java @@ -9,7 +9,7 @@ import lombok.NoArgsConstructor; @Builder @NoArgsConstructor @AllArgsConstructor -public class TableCells { +public class TableCell { private float x0; private float y0; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/TableData.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/TableData.java index be1d492..7a793d1 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/TableData.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/TableData.java @@ -14,7 +14,7 @@ import lombok.NoArgsConstructor; @AllArgsConstructor public class TableData { - private PageInfo pageInfo; - private List tableCells = new ArrayList<>(); + private PageInfo pageInfo;; + private List tableLines = new ArrayList<>(); } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/TableLine.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/TableLine.java new file mode 100644 index 0000000..826b8d1 --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/model/table/TableLine.java @@ -0,0 +1,19 @@ +package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableLine { + + private float x0; + private float y0; + private float x1; + private float y1; + +} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/RulingCleaningService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/RulingCleaningService.java index c51c90b..4095e90 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/RulingCleaningService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/RulingCleaningService.java @@ -14,7 +14,6 @@ import org.springframework.stereotype.Service; import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings; import com.knecon.fforesight.service.layoutparser.processor.model.table.Rectangle; import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling; -import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCells; import com.knecon.fforesight.service.layoutparser.processor.utils.UnionFind; import lombok.RequiredArgsConstructor; @@ -31,14 +30,15 @@ public class RulingCleaningService { private static final float THRESHOLD_Y_HORIZONTAL = 3; - public CleanRulings getCleanRulings(List tableCells, List rulings) { + public CleanRulings getCleanRulings(List parsedRulings, List rulings) { Rulings verticalAndHorizontalRulingLines; + // todo 8642: set cv parsed only when no rulings exist or just always add them? if (!rulings.isEmpty()) { verticalAndHorizontalRulingLines = extractVerticalAndHorizontalRulingLines(rulings); } else { - verticalAndHorizontalRulingLines = getRulingsFromParsedCells(tableCells); + verticalAndHorizontalRulingLines = extractVerticalAndHorizontalRulingLines(parsedRulings); } verticalAndHorizontalRulingLines.verticalLines.sort(X_FIRST_RULING_COMPARATOR); @@ -175,46 +175,6 @@ public class RulingCleaningService { } - private Rulings getRulingsFromParsedCells(List tableCells) { - - List vrs = extractVerticalRulingsFromParsedCells(tableCells); - List hrs = extractHorizontalRulingsFromParsedCells(tableCells); - return new Rulings(vrs, hrs); - } - - - private List extractVerticalRulingsFromParsedCells(List tableCells) { - - List vrs = new ArrayList<>(); - - if (tableCells != null) { - for (TableCells tableCell : tableCells) { - Ruling leftLine = createRuling(tableCell.getX0(), tableCell.getX0(), tableCell.getY0(), tableCell.getY1()); - Ruling rightLine = createRuling(tableCell.getX1(), tableCell.getX1(), tableCell.getY0(), tableCell.getY1()); - vrs.add(leftLine); - vrs.add(rightLine); - } - } - return vrs; - } - - - private List extractHorizontalRulingsFromParsedCells(List tableCells) { - - List hrs = new ArrayList<>(); - - if (tableCells != null) { - for (TableCells tableCell : tableCells) { - Ruling topLine = createRuling(tableCell.getX0(), tableCell.getX1(), tableCell.getY1(), tableCell.getY1()); - Ruling baseLine = createRuling(tableCell.getX0(), tableCell.getX1(), tableCell.getY0(), tableCell.getY0()); - hrs.add(topLine); - hrs.add(baseLine); - } - } - return hrs; - } - - private Ruling createRuling(float tableCellX0, float tableCellX1, float tableCellY0, float tableCellY1) { float x0 = tableCellX0;