RED-8747 - Entities not merged properly - fp

- fix typo
- add validate table test
This commit is contained in:
Corina Olariu 2024-04-09 12:14:57 +03:00
parent f185b13f2b
commit 014eba9fc3
4 changed files with 15 additions and 2 deletions

View File

@ -59,7 +59,7 @@ public class RedactManagerBlockificationService {
boolean splitByX = prev != null && maxX + 50 < word.getMinXDirAdj() && prev.getMinYDirAdj() == word.getMinYDirAdj(); boolean splitByX = prev != null && maxX + 50 < word.getMinXDirAdj() && prev.getMinYDirAdj() == word.getMinYDirAdj();
boolean xIsBeforeFirstX = prev != null && word.getMinXDirAdj() < minX; boolean xIsBeforeFirstX = prev != null && word.getMinXDirAdj() < minX;
boolean newLineAfterSplit = prev != null && word.getMinYDirAdj() != prev.getMinYDirAdj() && wasSplitted && splitX1 != word.getMinXDirAdj(); boolean newLineAfterSplit = prev != null && word.getMinYDirAdj() != prev.getMinYDirAdj() && wasSplitted && splitX1 != word.getMinXDirAdj();
boolean isSplitByRuling = isSplitByRuling(minX, minY, maxX, maxY, word, usedVerticalRulings, usedVerticalRulings); boolean isSplitByRuling = isSplitByRuling(minX, minY, maxX, maxY, word, usedHorizonalRulings, usedVerticalRulings);
boolean splitByDir = prev != null && !prev.getDir().equals(word.getDir()); boolean splitByDir = prev != null && !prev.getDir().equals(word.getDir());
if (prev != null && (lineSeparation || startFromTop || splitByX || splitByDir || isSplitByRuling)) { if (prev != null && (lineSeparation || startFromTop || splitByX || splitByDir || isSplitByRuling)) {

View File

@ -37,7 +37,7 @@ public class LayoutparserEnd2EndTest extends AbstractTest {
@SneakyThrows @SneakyThrows
public void testLayoutParserEndToEnd_RED_8747() { public void testLayoutParserEndToEnd_RED_8747() {
prepareStorage("files/localTests/MergedEntities.pdf"); prepareStorage("files/SinglePages/MergedEntities.pdf");
LayoutParsingRequest layoutParsingRequest = buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER_OLD); LayoutParsingRequest layoutParsingRequest = buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER_OLD);
LayoutParsingFinishedEvent finishedEvent = layoutParsingPipeline.parseLayoutAndSaveFilesToStorage(layoutParsingRequest); LayoutParsingFinishedEvent finishedEvent = layoutParsingPipeline.parseLayoutAndSaveFilesToStorage(layoutParsingRequest);
Arrays.stream(finishedEvent.message().split("\n")) Arrays.stream(finishedEvent.message().split("\n"))

View File

@ -667,6 +667,19 @@ public class PdfSegmentationServiceTest extends AbstractTest {
} }
@Test
public void testMergedEntities_Page26() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/MergedEntities.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
validateTableSize(document, 1);
validateTable(document, 0, 6, 6, 0, 0);
}
@SneakyThrows @SneakyThrows
private void toHtml(ClassificationDocument document, String filename) { private void toHtml(ClassificationDocument document, String filename) {