TAAS-103: Fixed values in wrong cells

This commit is contained in:
Dominique Eifländer 2023-11-15 13:36:46 +01:00
parent c3e69b2cdf
commit a6ba66b1aa
9 changed files with 222 additions and 212 deletions

View File

@ -26,7 +26,6 @@ import com.knecon.fforesight.service.layoutparser.processor.model.Classification
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document; import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage; import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings; import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
import com.knecon.fforesight.service.layoutparser.processor.model.table.Rectangle;
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock; import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence; import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.CvTableParsingAdapter; import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.CvTableParsingAdapter;
@ -188,17 +187,7 @@ public class LayoutParsingPipeline {
boolean isLandscape = pdr.getWidth() > pdr.getHeight() && (rotation == 0 || rotation == 180) || pdr.getHeight() > pdr.getWidth() && (rotation == 90 || rotation == 270); boolean isLandscape = pdr.getWidth() > pdr.getHeight() && (rotation == 0 || rotation == 180) || pdr.getHeight() > pdr.getWidth() && (rotation == 90 || rotation == 270);
PDRectangle cropbox = pdPage.getCropBox(); PDRectangle cropbox = pdPage.getCropBox();
CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(pdfTableCells.get(pageNumber), CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(pdfTableCells.get(pageNumber), stripper.getRulings());
stripper.getRulings(),
stripper.getMinCharWidth(),
stripper.getMaxCharHeight());
List<Rectangle> spreedSheetArea = tableExtractionService.getSpreadSheetArea(cleanRulings, layoutParsingType);
Map<String,Float> newValues = calculateMinCharWidthAndMaxCharHeightInsideTable(stripper,spreedSheetArea);
cleanRulings = rulingCleaningService.getCleanRulings(pdfTableCells.get(pageNumber), stripper.getRulings(), newValues.get("minCharWidth"), newValues.get("minCharHeigth"));
ClassificationPage classificationPage = switch (layoutParsingType) { ClassificationPage classificationPage = switch (layoutParsingType) {
case REDACT_MANAGER -> redactManagerBlockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical()); case REDACT_MANAGER -> redactManagerBlockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
@ -221,7 +210,8 @@ public class LayoutParsingPipeline {
imageServiceResponseAdapter.findOcr(classificationPage); imageServiceResponseAdapter.findOcr(classificationPage);
} }
tableExtractionService.extractTables(cleanRulings, classificationPage, layoutParsingType); tableExtractionService.extractTables(cleanRulings, classificationPage);
buildPageStatistics(classificationPage); buildPageStatistics(classificationPage);
increaseDocumentStatistics(classificationPage, classificationDocument); increaseDocumentStatistics(classificationPage, classificationDocument);
@ -242,43 +232,6 @@ public class LayoutParsingPipeline {
return classificationDocument; return classificationDocument;
} }
/**
* Finds the smallest character by width
* and the largest character by height
* inside a table area
*
* @param stripper the stripper containing the words
* @param spreedSheetArea the table area
* @param initialMinCharWidth an initial value for a minimum char width
* @param initialMaxCharHeight an initial value for a maximum char heigth
*
* @return Map with both values
*/
private Map<String, Float> calculateMinCharWidthAndMaxCharHeightInsideTable(PDFLinesTextStripper stripper, List<Rectangle> spreedSheetArea) {
float newMinCharWidth = 10;
float newMinCharHeight = 30;
Map<String,Float> result = new HashMap<>();
for(var textPositionSequence: stripper.getTextPositionSequences() ) {
for(var redTextPosition: textPositionSequence.getTextPositions()) {
for(var area: spreedSheetArea) {
if(area.contains(redTextPosition.getPosition()[0], redTextPosition.getPosition()[1], redTextPosition.getPosition()[2], redTextPosition.getPosition()[3])) {
if(redTextPosition.getHeightDir() < newMinCharHeight) {
newMinCharHeight = redTextPosition.getHeightDir();
}
if(redTextPosition.getWidthDirAdj() < newMinCharWidth) {
newMinCharWidth = redTextPosition.getWidthDirAdj();
}
}
}
}
}
result.put("minCharWidth",newMinCharWidth);
result.put("minCharHeigth",newMinCharHeight);
return result;
}
private Map<String, List<Rectangle2D>> convertMarkedContents(List<PDMarkedContent> pdMarkedContents) { private Map<String, List<Rectangle2D>> convertMarkedContents(List<PDMarkedContent> pdMarkedContents) {
@ -291,8 +244,8 @@ public class LayoutParsingPipeline {
private void increaseDocumentStatistics(ClassificationPage classificationPage, ClassificationDocument document) { private void increaseDocumentStatistics(ClassificationPage classificationPage, ClassificationDocument document) {
// if (!classificationPage.isLandscape()) { // if (!classificationPage.isLandscape()) {
document.getFontSizeCounter().addAll(classificationPage.getFontSizeCounter().getCountPerValue()); document.getFontSizeCounter().addAll(classificationPage.getFontSizeCounter().getCountPerValue());
// } // }
document.getFontCounter().addAll(classificationPage.getFontCounter().getCountPerValue()); document.getFontCounter().addAll(classificationPage.getFontCounter().getCountPerValue());
document.getTextHeightCounter().addAll(classificationPage.getTextHeightCounter().getCountPerValue()); document.getTextHeightCounter().addAll(classificationPage.getTextHeightCounter().getCountPerValue());

View File

@ -1,12 +1,14 @@
package com.knecon.fforesight.service.layoutparser.processor.model.table; package com.knecon.fforesight.service.layoutparser.processor.model.table;
import java.awt.geom.Point2D; import java.awt.geom.Point2D;
import java.awt.geom.Rectangle2D;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.stream.Collectors;
import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock; import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType; import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
@ -252,7 +254,8 @@ public class TablePageBlock extends AbstractPageBlock {
if (prevY != null && prevX != null) { if (prevY != null && prevX != null) {
var cell = new Cell(new Point2D.Float(prevX, prevY), new Point2D.Float(x, y)); var cell = new Cell(new Point2D.Float(prevX, prevY), new Point2D.Float(x, y));
var intersectionCell = cells.stream().filter(c -> cell.intersects(c)).findFirst(); var intersectionCell = cells.stream().filter(c -> intersects(cell, c)).findFirst();
intersectionCell.ifPresent(value -> cell.getTextBlocks().addAll(value.getTextBlocks())); intersectionCell.ifPresent(value -> cell.getTextBlocks().addAll(value.getTextBlocks()));
if (cell.hasMinimumSize()) { if (cell.hasMinimumSize()) {
row.add(cell); row.add(cell);
@ -273,6 +276,21 @@ public class TablePageBlock extends AbstractPageBlock {
} }
public boolean intersects(Cell cell1, Cell cell2) {
if (cell1.getHeight() <= 0 || cell1.getHeight() <= 0 || cell2.getHeight() <= 0 || cell2.getHeight() <= 0) {
return false;
}
double x0 = cell1.getX() + 2;
double y0 = cell1.getY() + 2;
return (cell2.x + cell2.width > x0 &&
cell2.y + cell2.height > y0 &&
cell2.x < x0 + cell1.getWidth() -2 &&
cell2.y < y0 + cell1.getHeight() -2);
}
@Override @Override
public String getText() { public String getText() {

View File

@ -12,9 +12,9 @@ import java.util.Map;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCells;
import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings; import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling; import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCells;
import com.knecon.fforesight.service.layoutparser.processor.utils.DoubleComparisons; import com.knecon.fforesight.service.layoutparser.processor.utils.DoubleComparisons;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
@ -25,10 +25,13 @@ import lombok.extern.slf4j.Slf4j;
@RequiredArgsConstructor @RequiredArgsConstructor
public class RulingCleaningService { public class RulingCleaningService {
public CleanRulings getCleanRulings(List<TableCells> tableCells, List<Ruling> rulings, float minCharWidth, float maxCharHeight) { private static final float THRESHOLD = 6;
public CleanRulings getCleanRulings(List<TableCells> tableCells, List<Ruling> rulings) {
if (!rulings.isEmpty()) { if (!rulings.isEmpty()) {
snapPoints(rulings, minCharWidth, maxCharHeight); snapPoints(rulings);
} }
List<Ruling> vrs = new ArrayList<>(); List<Ruling> vrs = new ArrayList<>();
@ -53,14 +56,11 @@ public class RulingCleaningService {
} }
List<Ruling> horizontalRulingLines = collapseOrientedRulings(hrs); List<Ruling> horizontalRulingLines = collapseOrientedRulings(hrs);
return CleanRulings.builder() return CleanRulings.builder().vertical(verticalRulingLines).horizontal(horizontalRulingLines).build();
.vertical(verticalRulingLines)
.horizontal(horizontalRulingLines)
.build();
} }
public void snapPoints(List<? extends Line2D.Float> rulings, float xThreshold, float yThreshold) { public void snapPoints(List<? extends Line2D.Float> rulings) {
// collect points and keep a Line -> p1,p2 map // collect points and keep a Line -> p1,p2 map
Map<Line2D.Float, Point2D[]> linesToPoints = new HashMap<>(); Map<Line2D.Float, Point2D[]> linesToPoints = new HashMap<>();
@ -81,7 +81,7 @@ public class RulingCleaningService {
for (Point2D p : points.subList(1, points.size() - 1)) { for (Point2D p : points.subList(1, points.size() - 1)) {
List<Point2D> last = groupedPoints.get(groupedPoints.size() - 1); List<Point2D> last = groupedPoints.get(groupedPoints.size() - 1);
if (Math.abs(p.getX() - last.get(0).getX()) < xThreshold) { if (Math.abs(p.getX() - last.get(0).getX()) < THRESHOLD) {
groupedPoints.get(groupedPoints.size() - 1).add(p); groupedPoints.get(groupedPoints.size() - 1).add(p);
} else { } else {
groupedPoints.add(new ArrayList<>(Collections.singletonList(p))); groupedPoints.add(new ArrayList<>(Collections.singletonList(p)));
@ -108,7 +108,7 @@ public class RulingCleaningService {
for (Point2D p : points.subList(1, points.size() - 1)) { for (Point2D p : points.subList(1, points.size() - 1)) {
List<Point2D> last = groupedPoints.get(groupedPoints.size() - 1); List<Point2D> last = groupedPoints.get(groupedPoints.size() - 1);
if (Math.abs(p.getY() - last.get(0).getY()) < yThreshold) { if (Math.abs(p.getY() - last.get(0).getY()) < THRESHOLD) {
groupedPoints.get(groupedPoints.size() - 1).add(p); groupedPoints.get(groupedPoints.size() - 1).add(p);
} else { } else {
groupedPoints.add(new ArrayList<>(Collections.singletonList(p))); groupedPoints.add(new ArrayList<>(Collections.singletonList(p)));

View File

@ -1,9 +1,6 @@
package com.knecon.fforesight.service.layoutparser.processor.services; package com.knecon.fforesight.service.layoutparser.processor.services;
import java.awt.geom.Point2D; import java.awt.geom.Point2D;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
@ -13,11 +10,8 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import org.apache.pdfbox.Loader;
import org.springframework.core.io.ClassPathResource;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock; import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage; import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell; import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
@ -72,19 +66,16 @@ public class TableExtractionService {
public boolean contains(Cell cell, double x, double y, double w, double h) { public boolean contains(Cell cell, double x, double y, double w, double h) {
if (cell.isEmpty() || w <= 0 || h <= 0) { if (cell.isEmpty() || w <= 0 || h <= 0) {
return false; return false;
} }
double x0 = cell.getX(); double x0 = cell.getX();
double y0 = cell.getY(); double y0 = cell.getY();
return (x >= x0-2 && return (x >= x0 - 2 && y >= y0 - 2 && (x + w) <= x0 + cell.getWidth() + 2 && (y + h) <= y0 + cell.getHeight() + 2);
y >= y0-2 &&
(x + w) <= x0 + cell.getWidth()+2 &&
(y + h) <= y0 + cell.getHeight()+2);
} }
/** /**
* Finds tables on a page and moves textblocks into cells of the found tables. * Finds tables on a page and moves textblocks into cells of the found tables.
* Note: This algorithm uses Pdf Coordinate System where {0,0} rotated with the page rotation. * Note: This algorithm uses Pdf Coordinate System where {0,0} rotated with the page rotation.
@ -98,17 +89,17 @@ public class TableExtractionService {
* @param cleanRulings The lines used to build the table. * @param cleanRulings The lines used to build the table.
* @param page Page object that contains textblocks and statistics. * @param page Page object that contains textblocks and statistics.
*/ */
public void extractTables(CleanRulings cleanRulings, ClassificationPage page, LayoutParsingType layoutParsingType) { public void extractTables(CleanRulings cleanRulings, ClassificationPage page) {
List<Cell> cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical(), layoutParsingType);
List<Cell> cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical());
List<TextPageBlock> toBeRemoved = new ArrayList<>(); List<TextPageBlock> toBeRemoved = new ArrayList<>();
for (AbstractPageBlock abstractPageBlock : page.getTextBlocks()) { for (AbstractPageBlock abstractPageBlock : page.getTextBlocks()) {
TextPageBlock textBlock = (TextPageBlock) abstractPageBlock; TextPageBlock textBlock = (TextPageBlock) abstractPageBlock;
for (Cell cell : cells) { for (Cell cell : cells) {
if (cell.hasMinimumSize() && contains(cell, textBlock.getPdfMinX(), if (cell.hasMinimumSize() && contains(cell,
textBlock.getPdfMinX(),
textBlock.getPdfMinY(), textBlock.getPdfMinY(),
textBlock.getPdfMaxX() - textBlock.getPdfMinX(), textBlock.getPdfMaxX() - textBlock.getPdfMinX(),
textBlock.getPdfMaxY() - textBlock.getPdfMinY())) { textBlock.getPdfMaxY() - textBlock.getPdfMinY())) {
@ -149,39 +140,20 @@ public class TableExtractionService {
if (position != -1) { if (position != -1) {
page.getTextBlocks().add(position, table); page.getTextBlocks().add(position, table);
} }
String tmpFileName = "C:/Users/YANNIK~1/AppData/Local/Temp/page1.tables.html";
try (FileOutputStream fileOutputStream = new FileOutputStream(Path.of(tmpFileName).toFile())) {
fileOutputStream.write(table.getTextAsHtml().getBytes());
}
catch (IOException e) {
throw new RuntimeException(e);
}
} }
page.getTextBlocks().removeAll(toBeRemoved); page.getTextBlocks().removeAll(toBeRemoved);
}
public List<Rectangle> getSpreadSheetArea(CleanRulings cleanRulings, LayoutParsingType layoutParsingType) {
List<Cell> cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical(), layoutParsingType);
List<Rectangle> spreadsheetAreas = findSpreadsheetsFromCells(cells).stream().filter(r -> r.getWidth() > 0f && r.getHeight() > 0f).toList();
return spreadsheetAreas;
} }
public List<Cell> findCells(List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines, LayoutParsingType layoutParsingType) { public List<Cell> findCells(List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
if (layoutParsingType.equals(LayoutParsingType.TAAS)) { // Fix for 211.pdf
// TODO: breaks some tables, for example "1 Abamectin Prr.pdf" try to fix this upstream in RulingCleaningService for (Ruling r : horizontalRulingLines) {
for (Ruling r : horizontalRulingLines) { if (r.getX2() < r.getX1()) {
if (r.getX2() < r.getX1()) { double a = r.getX2();
double a = r.getX2(); r.x2 = (float) r.getX1();
r.x2 = (float) r.getX1(); r.x1 = (float) a;
r.x1 = (float) a;
}
} }
} }

View File

@ -1,18 +1,34 @@
package com.knecon.fforesight.service.layoutparser.processor.services.parsing; package com.knecon.fforesight.service.layoutparser.processor.services.parsing;
import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling; import java.awt.color.CMMException;
import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition; import java.awt.geom.Point2D;
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence; import java.io.IOException;
import lombok.Getter; import java.util.ArrayList;
import lombok.Setter; import java.util.List;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.contentstream.operator.Operator; import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.contentstream.operator.OperatorName; import org.apache.pdfbox.contentstream.operator.OperatorName;
import org.apache.pdfbox.contentstream.operator.color.*; import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColor;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorN;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorSpace;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceCMYKColor;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceGrayColor;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceRGBColor;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColor;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorN;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorSpace;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceCMYKColor;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceGrayColor;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceRGBColor;
import org.apache.pdfbox.contentstream.operator.markedcontent.BeginMarkedContentSequenceWithProperties; import org.apache.pdfbox.contentstream.operator.markedcontent.BeginMarkedContentSequenceWithProperties;
import org.apache.pdfbox.contentstream.operator.markedcontent.EndMarkedContentSequence; import org.apache.pdfbox.contentstream.operator.markedcontent.EndMarkedContentSequence;
import org.apache.pdfbox.contentstream.operator.state.*; import org.apache.pdfbox.contentstream.operator.state.SetFlatness;
import org.apache.pdfbox.contentstream.operator.state.SetLineCapStyle;
import org.apache.pdfbox.contentstream.operator.state.SetLineDashPattern;
import org.apache.pdfbox.contentstream.operator.state.SetLineJoinStyle;
import org.apache.pdfbox.contentstream.operator.state.SetLineMiterLimit;
import org.apache.pdfbox.contentstream.operator.state.SetLineWidth;
import org.apache.pdfbox.contentstream.operator.state.SetRenderingIntent;
import org.apache.pdfbox.contentstream.operator.text.SetFontAndSize; import org.apache.pdfbox.contentstream.operator.text.SetFontAndSize;
import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSNumber; import org.apache.pdfbox.cos.COSNumber;
@ -21,11 +37,14 @@ import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.graphics.color.PDColor; import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
import org.apache.pdfbox.text.TextPosition; import org.apache.pdfbox.text.TextPosition;
import java.awt.color.CMMException; import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
import java.awt.geom.Point2D; import com.knecon.fforesight.service.layoutparser.processor.model.text.RedTextPosition;
import java.io.IOException; import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
import java.util.ArrayList;
import java.util.List; import lombok.Getter;
import lombok.Setter;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@Getter @Getter
@Slf4j @Slf4j
@ -36,11 +55,6 @@ public class PDFLinesTextStripper extends PDFTextStripper {
private final List<Ruling> graphicsPath = new ArrayList<>(); private final List<Ruling> graphicsPath = new ArrayList<>();
@Setter @Setter
protected PDPage pdpage; protected PDPage pdpage;
private int minCharWidth;
private int maxCharWidth;
private int minCharHeight;
private int maxCharHeight;
private float path_x; private float path_x;
private float path_y; private float path_y;
@ -73,7 +87,6 @@ public class PDFLinesTextStripper extends PDFTextStripper {
this.addOperator(new SetFontAndSize(this)); this.addOperator(new SetFontAndSize(this));
this.addOperator(new SetLineWidth(this)); this.addOperator(new SetLineWidth(this));
addOperator(new BeginMarkedContentSequenceWithProperties(this)); addOperator(new BeginMarkedContentSequenceWithProperties(this));
// addOperator(new BeginMarkedContentSequence(this)); // addOperator(new BeginMarkedContentSequence(this));
addOperator(new EndMarkedContentSequence(this)); addOperator(new EndMarkedContentSequence(this));
@ -232,29 +245,13 @@ public class PDFLinesTextStripper extends PDFTextStripper {
.get(textPositionSequences.get(textPositionSequences.size() - 1).getTextPositions().size() - 1); .get(textPositionSequences.get(textPositionSequences.size() - 1).getTextPositions().size() - 1);
} }
int charWidth = (int) textPositions.get(i).getWidthDirAdj();
if (charWidth < minCharWidth) {
minCharWidth = charWidth;
}
if (charWidth > maxCharWidth) {
maxCharWidth = charWidth;
}
int charHeight = (int) textPositions.get(i).getHeightDir();
if (charHeight < minCharHeight) {
minCharHeight = charHeight;
}
if (charWidth > maxCharHeight) {
maxCharHeight = charHeight;
}
if (i == 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i).getUnicode().equals("\u00A0") || textPositions.get(i).getUnicode().equals("\t"))) { if (i == 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i).getUnicode().equals("\u00A0") || textPositions.get(i).getUnicode().equals("\t"))) {
startIndex++; startIndex++;
continue; continue;
} }
// Strange but sometimes this is happening, for example: Metolachlor2.pdf // Strange but sometimes this is happening, for example: Metolachlor2.pdf
if (checkIfCurrentPositionIsToTheRightOfPreviousPosition(i,textPositions)) { if (checkIfCurrentPositionIsToTheRightOfPreviousPosition(i, textPositions)) {
List<TextPosition> sublist = textPositions.subList(startIndex, i); List<TextPosition> sublist = textPositions.subList(startIndex, i);
if (checkIfSequenceContainsOnlyWhitespaces(sublist)) { if (checkIfSequenceContainsOnlyWhitespaces(sublist)) {
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber, i == textPositions.size() - 1 && isParagraphStart)); textPositionSequences.add(new TextPositionSequence(sublist, pageNumber, i == textPositions.size() - 1 && isParagraphStart));
@ -277,7 +274,7 @@ public class PDFLinesTextStripper extends PDFTextStripper {
if (checkIfSequenceContainsOnlyWhitespaces(sublist)) { if (checkIfSequenceContainsOnlyWhitespaces(sublist)) {
// Remove false sequence ends (whitespaces) // Remove false sequence ends (whitespaces)
if (checkIfGapSizeBetweenCharactersSmallerThanMaximum(previous,sublist,0.01f)) { if (checkIfGapSizeBetweenCharactersSmallerThanMaximum(previous, sublist, 0.01f)) {
for (TextPosition t : sublist) { for (TextPosition t : sublist) {
textPositionSequences.get(textPositionSequences.size() - 1).add(t); textPositionSequences.get(textPositionSequences.size() - 1).add(t);
} }
@ -311,17 +308,23 @@ public class PDFLinesTextStripper extends PDFTextStripper {
super.writeString(text); super.writeString(text);
} }
public boolean checkIfCurrentPositionIsToTheRightOfPreviousPosition(int i, List<TextPosition> textPositions) { public boolean checkIfCurrentPositionIsToTheRightOfPreviousPosition(int i, List<TextPosition> textPositions) {
return i > 0 && textPositions.get(i).getXDirAdj() < textPositions.get(i - 1).getXDirAdj(); return i > 0 && textPositions.get(i).getXDirAdj() < textPositions.get(i - 1).getXDirAdj();
} }
public boolean checkIfSequenceContainsOnlyWhitespaces(List<TextPosition> sublist) { public boolean checkIfSequenceContainsOnlyWhitespaces(List<TextPosition> sublist) {
return !(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0) return !(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
.getUnicode() .getUnicode()
.equals("\t"))); .equals("\t")));
} }
public boolean checkIfGapSizeBetweenCharactersSmallerThanMaximum(RedTextPosition previous, List<TextPosition> sublist, float maximumGapSize) { public boolean checkIfGapSizeBetweenCharactersSmallerThanMaximum(RedTextPosition previous, List<TextPosition> sublist, float maximumGapSize) {
return previous != null && sublist.get(0).getYDirAdj() == previous.getYDirAdj() && sublist.get(0) return previous != null && sublist.get(0).getYDirAdj() == previous.getYDirAdj() && sublist.get(0)
.getXDirAdj() - (previous.getXDirAdj() + previous.getWidthDirAdj()) < maximumGapSize; .getXDirAdj() - (previous.getXDirAdj() + previous.getWidthDirAdj()) < maximumGapSize;
} }
@ -334,10 +337,6 @@ public class PDFLinesTextStripper extends PDFTextStripper {
@Override @Override
public String getText(PDDocument doc) throws IOException { public String getText(PDDocument doc) throws IOException {
minCharWidth = Integer.MAX_VALUE;
maxCharWidth = 0;
minCharHeight = Integer.MAX_VALUE;
maxCharHeight = 0;
textPositionSequences.clear(); textPositionSequences.clear();
rulings.clear(); rulings.clear();
graphicsPath.clear(); graphicsPath.clear();

View File

@ -47,7 +47,7 @@ public class DocumentGraphVisualizationTest extends BuildDocumentTest {
@Disabled @Disabled
public void visualizeCraftedDocument() { public void visualizeCraftedDocument() {
String filename = "files/crafted document.pdf"; String filename = "files/1 Abamectin_prr.pdf";
visualizePdf(filename); visualizePdf(filename);
} }

View File

@ -1,5 +1,27 @@
package com.knecon.fforesight.service.layoutparser.server.segmentation; package com.knecon.fforesight.service.layoutparser.server.segmentation;
import static org.assertj.core.api.Assertions.assertThat;
import java.awt.geom.Rectangle2D;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.ClassPathResource;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType; import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipeline; import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipeline;
@ -15,21 +37,8 @@ import com.knecon.fforesight.service.layoutparser.processor.python_api.model.tab
import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService; import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService; import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService;
import com.knecon.fforesight.service.layoutparser.server.utils.AbstractTest; import com.knecon.fforesight.service.layoutparser.server.utils.AbstractTest;
import lombok.SneakyThrows; import lombok.SneakyThrows;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.ClassPathResource;
import java.awt.geom.Rectangle2D;
import java.io.IOException;
import java.util.*;
import java.util.stream.Collectors;
import static org.assertj.core.api.Assertions.assertThat;
import javax.sound.midi.SysexMessage;
public class PdfSegmentationServiceTest extends AbstractTest { public class PdfSegmentationServiceTest extends AbstractTest {
@ -67,6 +76,18 @@ public class PdfSegmentationServiceTest extends AbstractTest {
} }
@Test
public void tablesToHtmlDebugger() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izRMS (CZ) fRR Part B9_Page185.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getFile()));
toHtml(document, "/tmp/A20622A izRMS (CZ) fRR Part B9_Page185.html");
}
@Test @Test
@SneakyThrows @SneakyThrows
public void testMapping() { public void testMapping() {
@ -157,7 +178,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
} }
@Test @Test // Non-sense test
public void testDoc56Page170() throws IOException { public void testDoc56Page170() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/56 Fludioxonil_RAR_12_Volume_3CA_B-7_2018-02-21_Page170.pdf"); ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/56 Fludioxonil_RAR_12_Volume_3CA_B-7_2018-02-21_Page170.pdf");
@ -168,8 +189,25 @@ public class PdfSegmentationServiceTest extends AbstractTest {
validateTable(document, 0, 1, 1, 0, 0); validateTable(document, 0, 1, 1, 0, 0);
validateTable(document, 1, 2, 2, 0, 0); validateTable(document, 1, 2, 2, 0, 0);
validateTable(document, 2, 7, 20, 0, 0); validateTable(document, 2, 6, 20, 0, 0);
validateTable(document, 3, 8, 31, 0, 0); validateTable(document, 3, 7, 31, 0, 0);
}
@Test
public void testDoc211() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/211.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getFile()));
validateTableSize(document, 4);
validateTable(document, 0, 5, 4, 0, 0);
validateTable(document, 1, 5, 15, 14, 0);
validateTable(document, 2, 5, 14, 11, 0);
validateTable(document, 3, 5, 3, 0, 0);
} }
@ -222,6 +260,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getFile())); ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getFile()));
toHtml(document, "/tmp/html.html");
validateTableSize(document, 4); validateTableSize(document, 4);
validateTable(document, 0, 3, 2, 0, 0); validateTable(document, 0, 3, 2, 0, 0);
@ -233,17 +273,29 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test @Test
@Disabled // FIXME Fake Redactions leads to more cells, no solution for this currently
public void testDocA20622APartB9Page185() throws IOException { public void testDocA20622APartB9Page185() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izRMS (CZ) fRR Part B9_Page185.pdf"); ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izRMS (CZ) fRR Part B9_Page185.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getFile())); ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getFile()));
validateTableSize(document, 2); validateTableSize(document, 1);
validateTable(document, 0, 5, 5, 0, 0); validateTable(document, 0, 7, 4, 0, 0);
validateTable(document, 1, 11, 9, 0, 0); }
@Test
public void testDocA20622APartB9Page185FixedDoc() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izRMS (CZ) fRR Part B9_Page185_fixed.pdf");
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getFile()));
validateTableSize(document, 1);
validateTable(document, 0, 7, 4, 0, 0);
} }
@ -467,7 +519,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
validateTableSize(document, 1); validateTableSize(document, 1);
validateTable(document, 0, 9, 5, 0, 0); validateTable(document, 0, 9, 5, 2, 0);
} }
@ -486,6 +538,28 @@ public class PdfSegmentationServiceTest extends AbstractTest {
} }
@SneakyThrows
private void toHtml(ClassificationDocument document, String filename) {
var tables = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList();
StringBuilder sb = new StringBuilder();
int currentPage = 1;
for (var table : tables) {
if (currentPage != table.getPage()) {
currentPage = table.getPage();
sb.append("---------------------- Page ").append(currentPage).append("--------------\n");
}
sb.append("\n\n");
sb.append(table.getTextAsHtml());
}
try (FileOutputStream fileOutputStream = new FileOutputStream(Path.of(filename).toFile())) {
fileOutputStream.write(sb.toString().getBytes());
}
}
private void validateTable(ClassificationDocument document, int tableIndex, int colCount, int rowCount, int emptyCellsCountCorrect, int emptyCellsCountIncorrect) { private void validateTable(ClassificationDocument document, int tableIndex, int colCount, int rowCount, int emptyCellsCountCorrect, int emptyCellsCountIncorrect) {
TablePageBlock table = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(tableIndex); TablePageBlock table = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(tableIndex);

View File

@ -1,25 +1,16 @@
package com.knecon.fforesight.service.layoutparser.server.services; package com.knecon.fforesight.service.layoutparser.server.services;
import java.io.File;
import java.io.FileOutputStream;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import javax.print.Doc;
import org.apache.pdfbox.Loader; import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocument;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.springframework.core.io.ClassPathResource; import org.springframework.core.io.ClassPathResource;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.commons.jackson.ObjectMapperFactory;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData; import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure; import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType; import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
@ -35,7 +26,6 @@ import com.knecon.fforesight.service.layoutparser.processor.services.RulingClean
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory; import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper; import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper;
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.PropertiesMapper; import com.knecon.fforesight.service.layoutparser.processor.services.mapper.PropertiesMapper;
import com.knecon.fforesight.service.layoutparser.processor.services.parsing.PDFLinesTextStripper;
import com.knecon.fforesight.service.layoutparser.processor.services.visualization.LayoutGridService; import com.knecon.fforesight.service.layoutparser.processor.services.visualization.LayoutGridService;
import com.knecon.fforesight.service.layoutparser.processor.services.visualization.ViewerDocumentService; import com.knecon.fforesight.service.layoutparser.processor.services.visualization.ViewerDocumentService;
import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest; import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest;
@ -57,26 +47,27 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
List<CleanRulings> cleanRulingsPerPage = new LinkedList<>(); List<CleanRulings> cleanRulingsPerPage = new LinkedList<>();
writeJsons(Path.of(fileName)); writeJsons(Path.of(fileName));
for (PageContents pageContent : pageContents) { for (PageContents pageContent : pageContents) {
cleanRulingsPerPage.add(rulingCleaningService.getCleanRulings(Collections.emptyList(), pageContent.getRulings(), 8, 1)); cleanRulingsPerPage.add(rulingCleaningService.getCleanRulings(Collections.emptyList(), pageContent.getRulings()));
} }
PdfDraw.drawLinesPerPage(fileName, pageContents.stream().map(PageContents::getRulings).toList(), lineFileName); PdfDraw.drawLinesPerPage(fileName, pageContents.stream().map(PageContents::getRulings).toList(), lineFileName);
} }
@Test @Test
@SneakyThrows @SneakyThrows
public void testTableExtractionSingle() { public void testTableExtractionSingle() {
String filename ="C:\\Users\\YannikHampe\\repos\\layout-parser\\layoutparser-service\\layoutparser-service-server\\src\\test\\resources\\files\\SinglePages\\24 - SYN549522 - Acute Oral Toxicity - Rats_Page17.pdf";
String filename = "C:\\Users\\YannikHampe\\repos\\layout-parser\\layoutparser-service\\layoutparser-service-server\\src\\test\\resources\\files\\SinglePages\\24 - SYN549522 - Acute Oral Toxicity - Rats_Page17.pdf";
writeJsons(Path.of(filename)); writeJsons(Path.of(filename));
} }
@Test @Test
@SneakyThrows @SneakyThrows
public void testTableExtraction() { public void testTableExtraction() {
LayoutGridService layoutGridService = new LayoutGridService(); LayoutGridService layoutGridService = new LayoutGridService();
ViewerDocumentService viewerDocumentService = new ViewerDocumentService(layoutGridService); ViewerDocumentService viewerDocumentService = new ViewerDocumentService(layoutGridService);
@ -92,64 +83,67 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
} }
} }
@SneakyThrows @SneakyThrows
private void writeJsons(Path filename) { private void writeJsons(Path filename) {
Document documentGraphBefore = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER, Document documentGraphBefore = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
Loader.loadPDF(filename.toFile()), Loader.loadPDF(filename.toFile()),
new ImageServiceResponse(), new ImageServiceResponse(),
new TableServiceResponse())); new TableServiceResponse()));
Document documentGraphAfter = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER, Document documentGraphAfter = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
Loader.loadPDF(filename.toFile()), Loader.loadPDF(filename.toFile()),
new ImageServiceResponse(), new ImageServiceResponse(),
new TableServiceResponse())); new TableServiceResponse()));
DocumentData documentDataBefore = DocumentDataMapper.toDocumentData(documentGraphBefore); DocumentData documentDataBefore = DocumentDataMapper.toDocumentData(documentGraphBefore);
DocumentData documentDataAfter = DocumentDataMapper.toDocumentData(documentGraphAfter); DocumentData documentDataAfter = DocumentDataMapper.toDocumentData(documentGraphAfter);
if(!compareStructures(documentDataBefore.getDocumentStructure(), documentDataAfter.getDocumentStructure(), filename.getFileName().toString())) { if (!compareStructures(documentDataBefore.getDocumentStructure(), documentDataAfter.getDocumentStructure(), filename.getFileName().toString())) {
String tmpFileNameBefore = "C:/Users/YANNIK~1/AppData/Local/Temp/before."+filename.getFileName().toString();; String tmpFileNameBefore = "C:/Users/YANNIK~1/AppData/Local/Temp/before." + filename.getFileName().toString();
System.out.println(tmpFileNameBefore); ;
try (PDDocument pdDocument = Loader.loadPDF(filename.toFile())) { System.out.println(tmpFileNameBefore);
try (PDDocument pdDocument = Loader.loadPDF(filename.toFile())) {
PdfDraw.drawDocumentGraph(pdDocument, documentGraphBefore); PdfDraw.drawDocumentGraph(pdDocument, documentGraphBefore);
pdDocument.save(tmpFileNameBefore); pdDocument.save(tmpFileNameBefore);
}
String tmpFileNameAfter = "C:/Users/YANNIK~1/AppData/Local/Temp/after."+filename.getFileName().toString();;
System.out.println(tmpFileNameAfter);
try (PDDocument pdDocument = Loader.loadPDF(filename.toFile())) {
PdfDraw.drawDocumentGraph(pdDocument, documentGraphAfter);
pdDocument.save(tmpFileNameAfter);
}
} }
String tmpFileNameAfter = "C:/Users/YANNIK~1/AppData/Local/Temp/after." + filename.getFileName().toString();
;
System.out.println(tmpFileNameAfter);
try (PDDocument pdDocument = Loader.loadPDF(filename.toFile())) {
PdfDraw.drawDocumentGraph(pdDocument, documentGraphAfter);
pdDocument.save(tmpFileNameAfter);
}
}
} }
@SneakyThrows @SneakyThrows
private boolean compareStructures(DocumentStructure structure1, DocumentStructure structure2, String pdfName) { private boolean compareStructures(DocumentStructure structure1, DocumentStructure structure2, String pdfName) {
List listStructure1 = structure1.streamAllEntries()
List listStructure1 = structure1
.streamAllEntries()
.filter(entryData -> entryData.getType().equals(NodeType.TABLE)) .filter(entryData -> entryData.getType().equals(NodeType.TABLE))
.map(DocumentStructure.EntryData::getProperties) .map(DocumentStructure.EntryData::getProperties)
.map(properties -> { .map(properties -> {
var builder = Table.builder(); var builder = Table.builder();
PropertiesMapper.parseTableProperties(properties, builder); PropertiesMapper.parseTableProperties(properties, builder);
return builder.build(); return builder.build();
}).toList(); })
.toList();
List listStructure2 = structure2 List listStructure2 = structure2.streamAllEntries()
.streamAllEntries()
.filter(entryData -> entryData.getType().equals(NodeType.TABLE)) .filter(entryData -> entryData.getType().equals(NodeType.TABLE))
.map(DocumentStructure.EntryData::getProperties) .map(DocumentStructure.EntryData::getProperties)
.map(properties -> { .map(properties -> {
var builder = Table.builder(); var builder = Table.builder();
PropertiesMapper.parseTableProperties(properties, builder); PropertiesMapper.parseTableProperties(properties, builder);
return builder.build(); return builder.build();
}).toList(); })
.toList();
for (int i = 0; i < listStructure1.size(); i++) {
for(int i = 0; i < listStructure1.size(); i++) {
Table tableNode1 = (Table) listStructure1.get(i); Table tableNode1 = (Table) listStructure1.get(i);
Table tableNode2 = (Table) listStructure2.get(i); Table tableNode2 = (Table) listStructure2.get(i);
if(tableNode1.getNumberOfRows() != tableNode2.getNumberOfRows() || tableNode1.getNumberOfCols() != tableNode2.getNumberOfCols()) { if (tableNode1.getNumberOfRows() != tableNode2.getNumberOfRows() || tableNode1.getNumberOfCols() != tableNode2.getNumberOfCols()) {
return false; return false;
} }
} }