From c7f5b4a2808eb2e02a109980c6d24ccf93ce37ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thierry=20G=C3=B6ckel?= Date: Tue, 18 Aug 2020 20:35:17 +0200 Subject: [PATCH] Add unit test for table structure requirements --- .../PdfSegmentationServiceTest.java | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java new file mode 100644 index 00000000..537fa91b --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java @@ -0,0 +1,67 @@ +package com.iqser.red.service.redaction.v1.server.segmentation; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.io.IOException; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.kie.api.runtime.KieContainer; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.core.io.ClassPathResource; +import org.springframework.test.context.junit4.SpringRunner; + +import com.iqser.red.service.redaction.v1.server.classification.model.Document; +import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService; +import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; +import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService; +import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService; + +@SpringBootTest +@RunWith(SpringRunner.class) +public class PdfSegmentationServiceTest { + + @Autowired + private PdfSegmentationService pdfSegmentationService; + + @Autowired + private RulingCleaningService rulingCleaningService; + + @Autowired + private TableExtractionService tableExtractionService; + + @Autowired + private BlockificationService blockificationService; + + @MockBean + private KieContainer kieContainer; + + + @Test + public void testPDFSegmentationWithComplexTable() throws IOException { + + ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf"); + + try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) { + Document document = pdfSegmentationService.parseDocument(pdDocument); + assertThat(document.getParagraphs() + .stream() + .flatMap(paragraph -> paragraph.getTables().stream()) + .collect(Collectors.toList())).isNotEmpty(); + Table table = document.getParagraphs() + .stream() + .flatMap(paragraph -> paragraph.getTables().stream()) + .collect(Collectors.toList()) + .get(0); + assertThat(table.getColCount()).isEqualTo(6); + assertThat(table.getRowCount()).isEqualTo(13); + assertThat(table.getRows().stream().mapToInt(List::size).sum()).isEqualTo(6 * 13); + } + } + +}