RED-8747 - Entities not merged properly - fp

- fix typo
- add validate table test
This commit is contained in:
Corina Olariu 2024-04-09 12:14:57 +03:00
parent f185b13f2b
commit 014eba9fc3
4 changed files with 15 additions and 2 deletions

View File

@ -59,7 +59,7 @@ public class RedactManagerBlockificationService {
boolean splitByX = prev != null && maxX + 50 < word.getMinXDirAdj() && prev.getMinYDirAdj() == word.getMinYDirAdj();
boolean xIsBeforeFirstX = prev != null && word.getMinXDirAdj() < minX;
boolean newLineAfterSplit = prev != null && word.getMinYDirAdj() != prev.getMinYDirAdj() && wasSplitted && splitX1 != word.getMinXDirAdj();
boolean isSplitByRuling = isSplitByRuling(minX, minY, maxX, maxY, word, usedVerticalRulings, usedVerticalRulings);
boolean isSplitByRuling = isSplitByRuling(minX, minY, maxX, maxY, word, usedHorizonalRulings, usedVerticalRulings);
boolean splitByDir = prev != null && !prev.getDir().equals(word.getDir());
if (prev != null && (lineSeparation || startFromTop || splitByX || splitByDir || isSplitByRuling)) {

View File

@ -37,7 +37,7 @@ public class LayoutparserEnd2EndTest extends AbstractTest {
@SneakyThrows
public void testLayoutParserEndToEnd_RED_8747() {
prepareStorage("files/localTests/MergedEntities.pdf");
prepareStorage("files/SinglePages/MergedEntities.pdf");
LayoutParsingRequest layoutParsingRequest = buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER_OLD);
LayoutParsingFinishedEvent finishedEvent = layoutParsingPipeline.parseLayoutAndSaveFilesToStorage(layoutParsingRequest);
Arrays.stream(finishedEvent.message().split("\n"))

View File

@ -667,6 +667,19 @@ public class PdfSegmentationServiceTest extends AbstractTest {
}
@Test
public void testMergedEntities_Page26() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/MergedEntities.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
validateTableSize(document, 1);
validateTable(document, 0, 6, 6, 0, 0);
}
@SneakyThrows
private void toHtml(ClassificationDocument document, String filename) {