diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java index 566364f..274e1e8 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java @@ -69,11 +69,11 @@ public class HeadlinesGoldStandardIntegrationTest { public void testHeadlineDetection() { List metrics = new ArrayList<>(); - metrics.add(getMetrics("files/headlineTest/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf", + metrics.add(getMetrics("files/syngenta/CustomerFiles/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf", "files/headlineTest/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1)_REDACTION_LOG.json")); - metrics.add(getMetrics("files/headlineTest/91 Trinexapac-ethyl_RAR_01_Volume_1_2018-02-23.pdf", + metrics.add(getMetrics("files/syngenta/CustomerFiles/91 Trinexapac-ethyl_RAR_01_Volume_1_2018-02-23.pdf", "files/headlineTest/91 Trinexapac-ethyl_RAR_01_Volume_1_2018-02-23_REDACTION_LOG.json")); - metrics.add(getMetrics("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf", "files/headlineTest/S-Metolachlor_RAR_01_Volume_1_2018-09-06_REDACTION_LOG.json")); + metrics.add(getMetrics("files/syngenta/CustomerFiles/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf", "files/headlineTest/S-Metolachlor_RAR_01_Volume_1_2018-09-06_REDACTION_LOG.json")); double precision = metrics.stream().mapToDouble(Metrics::getPrecision).average().orElse(1.0); double recall = metrics.stream().mapToDouble(Metrics::getRecall).average().orElse(1.0); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java index 9861a52..7fde740 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java @@ -37,7 +37,7 @@ public class LayoutparserEnd2EndTest extends AbstractTest { @SneakyThrows public void testLayoutParserEndToEnd_RED_8747() { - prepareStorage("files/SinglePages/MergedEntities.pdf"); + prepareStorage("files/syngenta/CustomerFiles/SinglePages/Page26_fRR A23317A PI0015600 CEU core part B6 - CZ.pdf"); LayoutParsingRequest layoutParsingRequest = buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER_OLD); LayoutParsingFinishedEvent finishedEvent = layoutParsingPipeline.parseLayoutAndSaveFilesToStorage(layoutParsingRequest); Arrays.stream(finishedEvent.message().split("\n")) diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java index e1f078e..d523d2e 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java @@ -48,7 +48,7 @@ public class DocumentGraphJsonWritingTest extends BuildDocumentTest { @Disabled public void writeJsonForFileTest() { - var resource = new ClassPathResource("files/1 Abamectin_prr.pdf"); + var resource = new ClassPathResource("files/syngenta/CustomerFiles/1 Abamectin_prr.pdf"); writeJsons(resource.getFile().toPath()); } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphMappingTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphMappingTest.java index 4b28541..f8db426 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphMappingTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphMappingTest.java @@ -27,7 +27,7 @@ public class DocumentGraphMappingTest extends BuildDocumentTest { @SneakyThrows public void testGraphMapping() { - String filename = "files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf"; + String filename = "files/syngenta/CustomerFiles/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf"; Document document = buildGraph(filename); DocumentData documentData = DocumentDataMapper.toDocumentData(document); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphVisualizationTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphVisualizationTest.java index 1e98204..71df6e8 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphVisualizationTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphVisualizationTest.java @@ -28,7 +28,7 @@ public class DocumentGraphVisualizationTest extends BuildDocumentTest { // @Disabled public void visualizeMetolachlor() { - String filename = "files/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (4).pdf"; + String filename = "files/syngenta/CustomerFiles/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (4).pdf"; visualizePdf(filename); } @@ -48,7 +48,7 @@ public class DocumentGraphVisualizationTest extends BuildDocumentTest { @Disabled public void visualizeCraftedDocument() { - String filename = "files/1 Abamectin_prr.pdf"; + String filename = "files/syngenta/CustomerFiles/1 Abamectin_prr.pdf"; visualizePdf(filename); } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java index 580961e..1b06c03 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java @@ -27,7 +27,7 @@ public class ViewerDocumentTest extends BuildDocumentTest { @SneakyThrows public void testViewerDocument() { - String fileName = "files/new/ScrambledTextAfterSorting.pdf"; + String fileName = "files/syngenta/CustomerFiles/SinglePages/ScrambledTextAfterSorting.pdf"; String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf"; var documentFile = new ClassPathResource(fileName).getFile(); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java index f6b13f6..e5e7144 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java @@ -81,7 +81,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void tablesToHtmlDebugger() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T5 VV-640252-Page16.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/T5_Page16_VV-640252.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -149,7 +149,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testPDFSegmentationWithComplexTable() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Spanning Cells - Page131_S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty(); @@ -163,61 +163,130 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testTableExtraction() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Table.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource( + "files/syngenta/CustomerFiles/SinglePages/Merge Table - Page5_26 A8637C - EU AIR3 - LCP Section 10 - Ecotoxicological studies on the plant protection product - Reference list.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); - assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty(); - TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0); + assertThat(document.getSections() + .stream() + .flatMap(paragraph -> paragraph.getTables() + .stream()) + .collect(Collectors.toList())).isNotEmpty(); + TablePageBlock firstTable = document.getSections() + .stream() + .flatMap(paragraph -> paragraph.getTables() + .stream()) + .toList() + .get(0); assertThat(firstTable.getColCount()).isEqualTo(8); assertThat(firstTable.getRowCount()).isEqualTo(1); - TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(1); + TablePageBlock secondTable = document.getSections() + .stream() + .flatMap(paragraph -> paragraph.getTables() + .stream()) + .toList() + .get(1); assertThat(secondTable.getColCount()).isEqualTo(8); assertThat(secondTable.getRowCount()).isEqualTo(2); - List> firstTableHeaderCells = firstTable.getRows().get(0).stream().map(Collections::singletonList).collect(Collectors.toList()); - assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue(); + List> firstTableHeaderCells = firstTable.getRows() + .get(0) + .stream() + .map(Collections::singletonList) + .collect(Collectors.toList()); + assertThat(secondTable.getRows() + .stream() + .allMatch(row -> row.stream() + .map(Cell::getHeaderCells) + .toList().equals(firstTableHeaderCells))).isTrue(); } @Test public void testMultiPageMetadataPropagation() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Multi Page Table.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource( + "files/syngenta/CustomerFiles/SinglePages/Merge Multi Page Table - Page4_Page5_51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); - assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty(); - TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0); + assertThat(document.getSections() + .stream() + .flatMap(paragraph -> paragraph.getTables() + .stream()) + .collect(Collectors.toList())).isNotEmpty(); + TablePageBlock firstTable = document.getSections() + .stream() + .flatMap(paragraph -> paragraph.getTables() + .stream()) + .toList() + .get(0); assertThat(firstTable.getColCount()).isEqualTo(9); assertThat(firstTable.getRowCount()).isEqualTo(5); - TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(1); + TablePageBlock secondTable = document.getSections() + .stream() + .flatMap(paragraph -> paragraph.getTables() + .stream()) + .toList() + .get(1); assertThat(secondTable.getColCount()).isEqualTo(9); assertThat(secondTable.getRowCount()).isEqualTo(6); - List> firstTableHeaderCells = firstTable.getRows().get(firstTable.getRowCount() - 1).stream().map(Cell::getHeaderCells).collect(Collectors.toList()); - assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue(); + List> firstTableHeaderCells = firstTable.getRows() + .get(firstTable.getRowCount() - 1) + .stream() + .map(Cell::getHeaderCells) + .collect(Collectors.toList()); + assertThat(secondTable.getRows() + .stream() + .allMatch(row -> row.stream() + .map(Cell::getHeaderCells) + .toList().equals(firstTableHeaderCells))).isTrue(); } @Test public void testHeaderCellsForRotatedTable() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Rotated Table Headers.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource( + "files/syngenta/CustomerFiles/SinglePages/Rotated Table Headers - Page4_65 Mesotrione - EU AIR3 - LCA Section 1 Supplement Reference List.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); - assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty(); - TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0); + assertThat(document.getSections() + .stream() + .flatMap(paragraph -> paragraph.getTables() + .stream()) + .collect(Collectors.toList())).isNotEmpty(); + TablePageBlock firstTable = document.getSections() + .stream() + .flatMap(paragraph -> paragraph.getTables() + .stream()) + .toList() + .get(0); assertThat(firstTable.getColCount()).isEqualTo(8); assertThat(firstTable.getRowCount()).isEqualTo(1); - TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(1); + TablePageBlock secondTable = document.getSections() + .stream() + .flatMap(paragraph -> paragraph.getTables() + .stream()) + .toList() + .get(1); assertThat(secondTable.getColCount()).isEqualTo(8); assertThat(secondTable.getRowCount()).isEqualTo(6); - List> firstTableHeaderCells = firstTable.getRows().get(0).stream().map(Collections::singletonList).collect(Collectors.toList()); - assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue(); + List> firstTableHeaderCells = firstTable.getRows() + .get(0) + .stream() + .map(Collections::singletonList) + .collect(Collectors.toList()); + assertThat(secondTable.getRows() + .stream() + .allMatch(row -> row.stream() + .map(Cell::getHeaderCells) + .toList().equals(firstTableHeaderCells))).isTrue(); } @Test public void testDoc56Page170() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/56 Fludioxonil_RAR_12_Volume_3CA_B-7_2018-02-21_Page170.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page170_56 Fludioxonil_RAR_12_Volume_3CA_B-7_2018-02-21.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -251,7 +320,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testVV931175Page1() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/VV-931175_Page1.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page1_VV-931175.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -292,7 +361,8 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testDoc27Page6() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/27 A8637C - EU AIR3 - MCP Section 1 - Identity of the plant protection product_Page6.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource( + "files/syngenta/CustomerFiles/SinglePages/Page6_27 A8637C - EU AIR3 - MCP Section 1 - Identity of the plant protection product.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -312,7 +382,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Disabled // FIXME Fake Redactions leads to more cells, no solution for this currently public void testDocA20622APartB9Page185() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izRMS (CZ) fRR Part B9_Page185.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page185_A20622A izRMS (CZ) fRR Part B9.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -325,7 +395,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testDocA20622APartB9Page185FixedDoc() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izRMS (CZ) fRR Part B9_Page185_fixed.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page185_fixed_A20622A izRMS (CZ) fRR Part B9.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -338,7 +408,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testDocA20622APartB7Page123() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izZRMS (CZ) fRR Part B7_Page123.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page123_A20622A izZRMS (CZ) fRR Part B7.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -357,7 +427,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testDoc77Page111() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/77 Pirimicarb_RAR_08_Volume_3CA_B-6_2017-12-04_Page11.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/PAge11_77 Pirimicarb_RAR_08_Volume_3CA_B-6_2017-12-04.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -373,7 +443,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testDoc95Page532() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/95 Trinexapac-ethyl_RAR_08_Volume_3CA_B-6_2018-01-10_Page532.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page532_95 Trinexapac-ethyl_RAR_08_Volume_3CA_B-6_2018-01-10.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -386,7 +456,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testDoc52Page175() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21_Page175.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page175_52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -400,7 +470,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testDoc52Page174() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21_Page174.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page174_52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -413,7 +483,8 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testDoc19Page35() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017_Page35.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource( + "files/syngenta/CustomerFiles/SinglePages/Page35_19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -426,7 +497,8 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testDoc19Page161() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017_Page161.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource( + "files/syngenta/CustomerFiles/SinglePages/Page161_19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -441,7 +513,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { public void testDoc47Page30() throws IOException { ClassPathResource pdfFileResource = new ClassPathResource( - "files/SinglePages/47 Cyprodinil - EU AIR3 - MCA Section 5 Supplement - Toxicological and metabolism studies on the active substance_Page30.pdf"); + "files/syngenta/CustomerFiles/SinglePages/Page30_47 Cyprodinil - EU AIR3 - MCA Section 5 Supplement - Toxicological and metabolism studies on the active substance.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -457,7 +529,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { public void testDoc49Page61() throws IOException { ClassPathResource pdfFileResource = new ClassPathResource( - "files/SinglePages/49 Cyprodinil - EU AIR3 - MCA Section 8 Supplement - Ecotoxicological studies on the active substance_Page61.pdf"); + "files/syngenta/CustomerFiles/SinglePages/Page61_49 Cyprodinil - EU AIR3 - MCA Section 8 Supplement - Ecotoxicological studies on the active substance.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -472,7 +544,8 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testDoc81Page54() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/81 Pirimicarb_RAR_20_Volume_3CP_A10788A (_Pirimor_)_B-9_2017-12-04_Page54.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource( + "files/syngenta/CustomerFiles/SinglePages/Page54_81 Pirimicarb_RAR_20_Volume_3CP_A10788A (_Pirimor_)_B-9_2017-12-04.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -487,7 +560,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testDoc88Page134() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/85 Pydiflumetofen_DAR_08_Volume_3CA_B-6_2017-07-26_Page134.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page134_85 Pydiflumetofen_DAR_08_Volume_3CA_B-6_2017-07-26.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -502,7 +575,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testDocThiabendazolePage18() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/Thiabendazole DAR Addendum for ED_April_2020_Page18.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page18_Thiabendazole DAR Addendum for ED_April_2020.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -519,7 +592,8 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testDoc15Page18() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/15 - Pretilachlor - Acute Oral Toxicity (Up and Down Procedure) - Rat_Page18.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource( + "files/syngenta/CustomerFiles/SinglePages/Page18_15 - Pretilachlor - Acute Oral Toxicity (Up and Down Procedure) - Rat.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -534,7 +608,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { public void testDoc28Page23() throws IOException { ClassPathResource pdfFileResource = new ClassPathResource( - "files/SinglePages/28 A8637C - EU AIR3 - MCP Section 10 - Ecotoxicological studies on the plant protection product_Page23.pdf"); + "files/syngenta/CustomerFiles/SinglePages/Page23_28 A8637C - EU AIR3 - MCP Section 10 - Ecotoxicological studies on the plant protection product.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -549,7 +623,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testDoc24Page17() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/24 - SYN549522 - Acute Oral Toxicity - Rats_Page17.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page17_24 - SYN549522 - Acute Oral Toxicity - Rats.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -563,7 +637,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testDoc30Page5() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/30 - Dicamba - Acute Oral Toxicity - Rats_Page5.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/Page5_30 - Dicamba - Acute Oral Toxicity - Rats.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -626,7 +700,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testT3() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T3 S-Meto_Page29.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/T3_Page29_S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -640,7 +714,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testT4() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T4 138 IDD0000261736_Page16.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/T4_Page16_138 IDD0000261736.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -654,7 +728,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testT5() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T5 VV-640252-Page16.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/T5_Page16_VV-640252.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); @@ -670,7 +744,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { @Test public void testMergedEntities_Page26() throws IOException { - ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/MergedEntities.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page26_fRR A23317A PI0015600 CEU core part B6 - CZ.pdf"); ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/GapAcrossLinesDetectionServiceTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/GapAcrossLinesDetectionServiceTest.java index db26f8a..2bbc7ee 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/GapAcrossLinesDetectionServiceTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/GapAcrossLinesDetectionServiceTest.java @@ -26,7 +26,7 @@ class GapAcrossLinesDetectionServiceTest { @SneakyThrows public void testGapBasedColumnDetection() { - String filename = "files/invisible_tables/test-two-pages_ocred.pdf"; + String filename = "files/basf/CustomerFiles/invisible_tables_test-two-pages_ocred.pdf"; var tmpFileName = "/tmp/" + filename.split("/")[2] + "_COLUMNS.pdf"; System.out.println("start TextPosition extraction"); long start = System.currentTimeMillis(); @@ -52,7 +52,7 @@ class GapAcrossLinesDetectionServiceTest { @SneakyThrows public void testColumnDetection() { - String filename = "files/invisible_tables/test-two-pages_ocred.pdf"; + String filename = "files/basf/CustomerFiles/invisible_tables_test-two-pages_ocred.pdf"; var tmpFileName = "/tmp/" + filename.split("/")[2] + "_COLUMNS.pdf"; System.out.println("start TextPosition extraction"); long start = System.currentTimeMillis(); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/InvisibleTableDetectionServiceTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/InvisibleTableDetectionServiceTest.java index 7eb50e2..f16f7e8 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/InvisibleTableDetectionServiceTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/InvisibleTableDetectionServiceTest.java @@ -26,7 +26,7 @@ class InvisibleTableDetectionServiceTest { @SneakyThrows public void detectInvisibleTableTest() { - String fileName = "files/invisible_tables/test-two-pages_ocred.pdf"; + String fileName = "files/basf/CustomerFiles/invisible_tables_test-two-pages_ocred.pdf"; var tmpFileName = Path.of("/tmp/").resolve(Path.of(fileName).getFileName() + "_TABLE.pdf").toString(); List pageContents = PageContentExtractor.getSortedPageContents(fileName).stream().map(PageInformationService::build).collect(Collectors.toList()); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/MainBodyTextFrameExtractionServiceTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/MainBodyTextFrameExtractionServiceTest.java index 84c3ba2..1173f30 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/MainBodyTextFrameExtractionServiceTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/MainBodyTextFrameExtractionServiceTest.java @@ -18,7 +18,7 @@ class MainBodyTextFrameExtractionServiceTest { @SneakyThrows public void testMainBodyDetection() { - String fileName = "files/invisible_tables/test-two-pages_ocred.pdf"; + String fileName = "files/basf/CustomerFiles/invisible_tables_test-two-pages_ocred.pdf"; String tmpFileName = Path.of("/tmp/").resolve(Path.of(fileName).getFileName() + "_MAIN_BODY.pdf").toString(); List sortedTextPositionSequence = PageContentExtractor.getSortedPageContents(fileName); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/PageContentExtractorTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/PageContentExtractorTest.java index cbaa195..5ced396 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/PageContentExtractorTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/PageContentExtractorTest.java @@ -20,7 +20,7 @@ class PageContentExtractorTest { @SneakyThrows public void testTextPositionSequenceExtraction() { - String fileName = "files/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (4).pdf"; + String fileName = "files/syngenta/CustomerFiles/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (4).pdf"; var tmpFileName = Path.of("/tmp/").resolve(Path.of(fileName).getFileName() + "_TEXT_POSITION_SEQUENCES.pdf").toString(); List textPositionPerPage = PageContentExtractor.getSortedPageContents(fileName); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/PageInformationServiceTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/PageInformationServiceTest.java index 15d8243..0da7c58 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/PageInformationServiceTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/PageInformationServiceTest.java @@ -20,7 +20,7 @@ class PageInformationServiceTest { @SneakyThrows public void testGapDetection() { - String filename = "files/invisible_tables/test-two-pages_ocred.pdf"; + String filename = "files/basf/CustomerFiles/invisible_tables_test-two-pages_ocred.pdf"; var tmpFileName = "/tmp/" + filename.split("/")[2] + "_GAPS.pdf"; System.out.println("start TextPosition extraction"); long start = System.currentTimeMillis(); @@ -43,7 +43,7 @@ class PageInformationServiceTest { @SneakyThrows public void testLineDetection() { - String filename = "files/invisible_tables/test-two-pages_ocred.pdf"; + String filename = "files/basf/CustomerFiles/invisible_tables_test-two-pages_ocred.pdf"; var tmpFileName = "/tmp/" + filename.split("/")[2] + "_GAPS.pdf"; System.out.println("start TextPosition extraction"); long start = System.currentTimeMillis(); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java index 00955f0..a66d540 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingCleaningServiceTest.java @@ -46,7 +46,7 @@ public class RulingCleaningServiceTest extends BuildDocumentTest { @SneakyThrows public void textRectanglesFromRulingsExtraction() { - String fileName = "files/SinglePages/T5 VV-640252-Page16.pdf"; + String fileName = "files/syngenta/CustomerFiles/SinglePages/T5_Page16_VV-640252.pdf"; String lineFileName = "/tmp/" + Path.of(fileName).getFileName().toString() + "_CELLS.pdf"; List pageContents = PageContentExtractor.getSortedPageContents(fileName); RulingCleaningService rulingCleaningService = new RulingCleaningService(); @@ -66,7 +66,7 @@ public class RulingCleaningServiceTest extends BuildDocumentTest { @SneakyThrows public void textRulingExtraction() { - String fileName = "files/SinglePages/19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017_Page35.pdf"; + String fileName = "files/syngenta/CustomerFiles/SinglePages/Page35_19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017.pdf"; String lineFileName = "/tmp/" + Path.of(fileName).getFileName().toString() + "_LINES.pdf"; List pageContents = PageContentExtractor.getSortedPageContents(fileName); RulingCleaningService rulingCleaningService = new RulingCleaningService(); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/BuildDocumentTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/BuildDocumentTest.java index 1374360..af2717b 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/BuildDocumentTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/BuildDocumentTest.java @@ -46,7 +46,7 @@ public abstract class BuildDocumentTest extends AbstractTest { @SneakyThrows protected Document buildGraph(String filename, LayoutParsingType layoutParsingType) { - if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf")) { + if (filename.equals("files/syngenta/CustomerFiles/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf")) { prepareStorage(filename, "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json"); } else { prepareStorage(filename); diff --git a/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/30 - Dicamba - Acute Oral Toxicity - Rats_Page5.pdf b/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/Page5_30 - Dicamba - Acute Oral Toxicity - Rats.pdf similarity index 100% rename from layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/30 - Dicamba - Acute Oral Toxicity - Rats_Page5.pdf rename to layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/Page5_30 - Dicamba - Acute Oral Toxicity - Rats.pdf