From c90eee23c4f9f1fbc319cedb3c2aa8ce63a72101 Mon Sep 17 00:00:00 2001 From: deiflaender Date: Thu, 3 Dec 2020 15:03:55 +0100 Subject: [PATCH] Fixed duplicate Textblock in Tables --- .../service/TableExtractionService.java | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/TableExtractionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/TableExtractionService.java index 69c2fe69..3dddd34a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/TableExtractionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/TableExtractionService.java @@ -30,12 +30,14 @@ public class TableExtractionService { List cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical()); + List toBeRemoved = new ArrayList<>(); + for (AbstractTextContainer abstractTextContainer : page.getTextBlocks()) { TextBlock textBlock = (TextBlock) abstractTextContainer; for (Cell cell : cells) { - if (cell.intersects(textBlock.getMinX(), textBlock.getMinY(), textBlock.getWidth(), - textBlock.getHeight())) { + if (cell.intersects(textBlock.getMinX(), textBlock.getMinY(), textBlock.getWidth(), textBlock.getHeight())) { cell.addTextBlock(textBlock); + toBeRemoved.add(textBlock); break; } } @@ -44,7 +46,6 @@ public class TableExtractionService { cells = new ArrayList<>(new HashSet<>(cells)); Utils.sort(cells, Rectangle.ILL_DEFINED_ORDER); - List spreadsheetAreas = findSpreadsheetsFromCells(cells).stream() .filter(r -> r.getWidth() > 0f && r.getHeight() > 0f) .collect(Collectors.toList()); @@ -67,25 +68,23 @@ public class TableExtractionService { Iterator itty = page.getTextBlocks().iterator(); while (itty.hasNext()) { AbstractTextContainer textBlock = itty.next(); - if (table.contains(textBlock)) { - if (position == -1) { - position = page.getTextBlocks().indexOf(textBlock); - } - itty.remove(); + if (table.contains(textBlock) && position == -1) { + position = page.getTextBlocks().indexOf(textBlock); } } if (position != -1) { page.getTextBlocks().add(position, table); } } + + page.getTextBlocks().removeAll(toBeRemoved); } public List findCells(List horizontalRulingLines, List verticalRulingLines) { List cellsFound = new ArrayList<>(); - Map intersectionPoints = Ruling.findIntersections(horizontalRulingLines, - verticalRulingLines); + Map intersectionPoints = Ruling.findIntersections(horizontalRulingLines, verticalRulingLines); List intersectionPointsList = new ArrayList<>(intersectionPoints.keySet()); intersectionPointsList.sort(POINT_COMPARATOR);