RED-8701 - Move files to customer data repositories

- update unit tests with the new path to submodules for customer files
This commit is contained in:
Corina Olariu 2024-04-22 13:37:27 +03:00
parent 6b1b5eab84
commit 20e4e5ddff
15 changed files with 138 additions and 64 deletions

View File

@ -69,11 +69,11 @@ public class HeadlinesGoldStandardIntegrationTest {
public void testHeadlineDetection() {
List<Metrics> metrics = new ArrayList<>();
metrics.add(getMetrics("files/headlineTest/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf",
metrics.add(getMetrics("files/syngenta/CustomerFiles/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf",
"files/headlineTest/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1)_REDACTION_LOG.json"));
metrics.add(getMetrics("files/headlineTest/91 Trinexapac-ethyl_RAR_01_Volume_1_2018-02-23.pdf",
metrics.add(getMetrics("files/syngenta/CustomerFiles/91 Trinexapac-ethyl_RAR_01_Volume_1_2018-02-23.pdf",
"files/headlineTest/91 Trinexapac-ethyl_RAR_01_Volume_1_2018-02-23_REDACTION_LOG.json"));
metrics.add(getMetrics("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf", "files/headlineTest/S-Metolachlor_RAR_01_Volume_1_2018-09-06_REDACTION_LOG.json"));
metrics.add(getMetrics("files/syngenta/CustomerFiles/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf", "files/headlineTest/S-Metolachlor_RAR_01_Volume_1_2018-09-06_REDACTION_LOG.json"));
double precision = metrics.stream().mapToDouble(Metrics::getPrecision).average().orElse(1.0);
double recall = metrics.stream().mapToDouble(Metrics::getRecall).average().orElse(1.0);

View File

@ -37,7 +37,7 @@ public class LayoutparserEnd2EndTest extends AbstractTest {
@SneakyThrows
public void testLayoutParserEndToEnd_RED_8747() {
prepareStorage("files/SinglePages/MergedEntities.pdf");
prepareStorage("files/syngenta/CustomerFiles/SinglePages/Page26_fRR A23317A PI0015600 CEU core part B6 - CZ.pdf");
LayoutParsingRequest layoutParsingRequest = buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER_OLD);
LayoutParsingFinishedEvent finishedEvent = layoutParsingPipeline.parseLayoutAndSaveFilesToStorage(layoutParsingRequest);
Arrays.stream(finishedEvent.message().split("\n"))

View File

@ -48,7 +48,7 @@ public class DocumentGraphJsonWritingTest extends BuildDocumentTest {
@Disabled
public void writeJsonForFileTest() {
var resource = new ClassPathResource("files/1 Abamectin_prr.pdf");
var resource = new ClassPathResource("files/syngenta/CustomerFiles/1 Abamectin_prr.pdf");
writeJsons(resource.getFile().toPath());
}

View File

@ -27,7 +27,7 @@ public class DocumentGraphMappingTest extends BuildDocumentTest {
@SneakyThrows
public void testGraphMapping() {
String filename = "files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf";
String filename = "files/syngenta/CustomerFiles/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf";
Document document = buildGraph(filename);
DocumentData documentData = DocumentDataMapper.toDocumentData(document);

View File

@ -28,7 +28,7 @@ public class DocumentGraphVisualizationTest extends BuildDocumentTest {
// @Disabled
public void visualizeMetolachlor() {
String filename = "files/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (4).pdf";
String filename = "files/syngenta/CustomerFiles/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (4).pdf";
visualizePdf(filename);
}
@ -48,7 +48,7 @@ public class DocumentGraphVisualizationTest extends BuildDocumentTest {
@Disabled
public void visualizeCraftedDocument() {
String filename = "files/1 Abamectin_prr.pdf";
String filename = "files/syngenta/CustomerFiles/1 Abamectin_prr.pdf";
visualizePdf(filename);
}

View File

@ -27,7 +27,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
@SneakyThrows
public void testViewerDocument() {
String fileName = "files/new/ScrambledTextAfterSorting.pdf";
String fileName = "files/syngenta/CustomerFiles/SinglePages/ScrambledTextAfterSorting.pdf";
String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
var documentFile = new ClassPathResource(fileName).getFile();

View File

@ -81,7 +81,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void tablesToHtmlDebugger() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T5 VV-640252-Page16.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/T5_Page16_VV-640252.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -149,7 +149,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testPDFSegmentationWithComplexTable() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Spanning Cells - Page131_S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
@ -163,61 +163,130 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testTableExtraction() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Table.pdf");
ClassPathResource pdfFileResource = new ClassPathResource(
"files/syngenta/CustomerFiles/SinglePages/Merge Table - Page5_26 A8637C - EU AIR3 - LCP Section 10 - Ecotoxicological studies on the plant protection product - Reference list.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0);
assertThat(document.getSections()
.stream()
.flatMap(paragraph -> paragraph.getTables()
.stream())
.collect(Collectors.toList())).isNotEmpty();
TablePageBlock firstTable = document.getSections()
.stream()
.flatMap(paragraph -> paragraph.getTables()
.stream())
.toList()
.get(0);
assertThat(firstTable.getColCount()).isEqualTo(8);
assertThat(firstTable.getRowCount()).isEqualTo(1);
TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(1);
TablePageBlock secondTable = document.getSections()
.stream()
.flatMap(paragraph -> paragraph.getTables()
.stream())
.toList()
.get(1);
assertThat(secondTable.getColCount()).isEqualTo(8);
assertThat(secondTable.getRowCount()).isEqualTo(2);
List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(0).stream().map(Collections::singletonList).collect(Collectors.toList());
assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
.get(0)
.stream()
.map(Collections::singletonList)
.collect(Collectors.toList());
assertThat(secondTable.getRows()
.stream()
.allMatch(row -> row.stream()
.map(Cell::getHeaderCells)
.toList().equals(firstTableHeaderCells))).isTrue();
}
@Test
public void testMultiPageMetadataPropagation() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Multi Page Table.pdf");
ClassPathResource pdfFileResource = new ClassPathResource(
"files/syngenta/CustomerFiles/SinglePages/Merge Multi Page Table - Page4_Page5_51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0);
assertThat(document.getSections()
.stream()
.flatMap(paragraph -> paragraph.getTables()
.stream())
.collect(Collectors.toList())).isNotEmpty();
TablePageBlock firstTable = document.getSections()
.stream()
.flatMap(paragraph -> paragraph.getTables()
.stream())
.toList()
.get(0);
assertThat(firstTable.getColCount()).isEqualTo(9);
assertThat(firstTable.getRowCount()).isEqualTo(5);
TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(1);
TablePageBlock secondTable = document.getSections()
.stream()
.flatMap(paragraph -> paragraph.getTables()
.stream())
.toList()
.get(1);
assertThat(secondTable.getColCount()).isEqualTo(9);
assertThat(secondTable.getRowCount()).isEqualTo(6);
List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(firstTable.getRowCount() - 1).stream().map(Cell::getHeaderCells).collect(Collectors.toList());
assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
.get(firstTable.getRowCount() - 1)
.stream()
.map(Cell::getHeaderCells)
.collect(Collectors.toList());
assertThat(secondTable.getRows()
.stream()
.allMatch(row -> row.stream()
.map(Cell::getHeaderCells)
.toList().equals(firstTableHeaderCells))).isTrue();
}
@Test
public void testHeaderCellsForRotatedTable() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Rotated Table Headers.pdf");
ClassPathResource pdfFileResource = new ClassPathResource(
"files/syngenta/CustomerFiles/SinglePages/Rotated Table Headers - Page4_65 Mesotrione - EU AIR3 - LCA Section 1 Supplement Reference List.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0);
assertThat(document.getSections()
.stream()
.flatMap(paragraph -> paragraph.getTables()
.stream())
.collect(Collectors.toList())).isNotEmpty();
TablePageBlock firstTable = document.getSections()
.stream()
.flatMap(paragraph -> paragraph.getTables()
.stream())
.toList()
.get(0);
assertThat(firstTable.getColCount()).isEqualTo(8);
assertThat(firstTable.getRowCount()).isEqualTo(1);
TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(1);
TablePageBlock secondTable = document.getSections()
.stream()
.flatMap(paragraph -> paragraph.getTables()
.stream())
.toList()
.get(1);
assertThat(secondTable.getColCount()).isEqualTo(8);
assertThat(secondTable.getRowCount()).isEqualTo(6);
List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(0).stream().map(Collections::singletonList).collect(Collectors.toList());
assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
.get(0)
.stream()
.map(Collections::singletonList)
.collect(Collectors.toList());
assertThat(secondTable.getRows()
.stream()
.allMatch(row -> row.stream()
.map(Cell::getHeaderCells)
.toList().equals(firstTableHeaderCells))).isTrue();
}
@Test
public void testDoc56Page170() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/56 Fludioxonil_RAR_12_Volume_3CA_B-7_2018-02-21_Page170.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page170_56 Fludioxonil_RAR_12_Volume_3CA_B-7_2018-02-21.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -251,7 +320,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testVV931175Page1() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/VV-931175_Page1.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page1_VV-931175.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -292,7 +361,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testDoc27Page6() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/27 A8637C - EU AIR3 - MCP Section 1 - Identity of the plant protection product_Page6.pdf");
ClassPathResource pdfFileResource = new ClassPathResource(
"files/syngenta/CustomerFiles/SinglePages/Page6_27 A8637C - EU AIR3 - MCP Section 1 - Identity of the plant protection product.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -312,7 +382,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Disabled // FIXME Fake Redactions leads to more cells, no solution for this currently
public void testDocA20622APartB9Page185() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izRMS (CZ) fRR Part B9_Page185.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page185_A20622A izRMS (CZ) fRR Part B9.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -325,7 +395,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testDocA20622APartB9Page185FixedDoc() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izRMS (CZ) fRR Part B9_Page185_fixed.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page185_fixed_A20622A izRMS (CZ) fRR Part B9.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -338,7 +408,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testDocA20622APartB7Page123() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izZRMS (CZ) fRR Part B7_Page123.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page123_A20622A izZRMS (CZ) fRR Part B7.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -357,7 +427,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testDoc77Page111() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/77 Pirimicarb_RAR_08_Volume_3CA_B-6_2017-12-04_Page11.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/PAge11_77 Pirimicarb_RAR_08_Volume_3CA_B-6_2017-12-04.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -373,7 +443,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testDoc95Page532() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/95 Trinexapac-ethyl_RAR_08_Volume_3CA_B-6_2018-01-10_Page532.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page532_95 Trinexapac-ethyl_RAR_08_Volume_3CA_B-6_2018-01-10.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -386,7 +456,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testDoc52Page175() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21_Page175.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page175_52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -400,7 +470,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testDoc52Page174() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21_Page174.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page174_52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -413,7 +483,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testDoc19Page35() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017_Page35.pdf");
ClassPathResource pdfFileResource = new ClassPathResource(
"files/syngenta/CustomerFiles/SinglePages/Page35_19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -426,7 +497,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testDoc19Page161() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017_Page161.pdf");
ClassPathResource pdfFileResource = new ClassPathResource(
"files/syngenta/CustomerFiles/SinglePages/Page161_19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -441,7 +513,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
public void testDoc47Page30() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource(
"files/SinglePages/47 Cyprodinil - EU AIR3 - MCA Section 5 Supplement - Toxicological and metabolism studies on the active substance_Page30.pdf");
"files/syngenta/CustomerFiles/SinglePages/Page30_47 Cyprodinil - EU AIR3 - MCA Section 5 Supplement - Toxicological and metabolism studies on the active substance.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -457,7 +529,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
public void testDoc49Page61() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource(
"files/SinglePages/49 Cyprodinil - EU AIR3 - MCA Section 8 Supplement - Ecotoxicological studies on the active substance_Page61.pdf");
"files/syngenta/CustomerFiles/SinglePages/Page61_49 Cyprodinil - EU AIR3 - MCA Section 8 Supplement - Ecotoxicological studies on the active substance.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -472,7 +544,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testDoc81Page54() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/81 Pirimicarb_RAR_20_Volume_3CP_A10788A (_Pirimor_)_B-9_2017-12-04_Page54.pdf");
ClassPathResource pdfFileResource = new ClassPathResource(
"files/syngenta/CustomerFiles/SinglePages/Page54_81 Pirimicarb_RAR_20_Volume_3CP_A10788A (_Pirimor_)_B-9_2017-12-04.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -487,7 +560,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testDoc88Page134() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/85 Pydiflumetofen_DAR_08_Volume_3CA_B-6_2017-07-26_Page134.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page134_85 Pydiflumetofen_DAR_08_Volume_3CA_B-6_2017-07-26.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -502,7 +575,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testDocThiabendazolePage18() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/Thiabendazole DAR Addendum for ED_April_2020_Page18.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page18_Thiabendazole DAR Addendum for ED_April_2020.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -519,7 +592,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testDoc15Page18() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/15 - Pretilachlor - Acute Oral Toxicity (Up and Down Procedure) - Rat_Page18.pdf");
ClassPathResource pdfFileResource = new ClassPathResource(
"files/syngenta/CustomerFiles/SinglePages/Page18_15 - Pretilachlor - Acute Oral Toxicity (Up and Down Procedure) - Rat.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -534,7 +608,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
public void testDoc28Page23() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource(
"files/SinglePages/28 A8637C - EU AIR3 - MCP Section 10 - Ecotoxicological studies on the plant protection product_Page23.pdf");
"files/syngenta/CustomerFiles/SinglePages/Page23_28 A8637C - EU AIR3 - MCP Section 10 - Ecotoxicological studies on the plant protection product.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -549,7 +623,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testDoc24Page17() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/24 - SYN549522 - Acute Oral Toxicity - Rats_Page17.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page17_24 - SYN549522 - Acute Oral Toxicity - Rats.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -563,7 +637,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testDoc30Page5() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/30 - Dicamba - Acute Oral Toxicity - Rats_Page5.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/Page5_30 - Dicamba - Acute Oral Toxicity - Rats.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -626,7 +700,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testT3() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T3 S-Meto_Page29.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/T3_Page29_S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -640,7 +714,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testT4() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T4 138 IDD0000261736_Page16.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/T4_Page16_138 IDD0000261736.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -654,7 +728,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testT5() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T5 VV-640252-Page16.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/T5_Page16_VV-640252.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
@ -670,7 +744,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
public void testMergedEntities_Page26() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/MergedEntities.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page26_fRR A23317A PI0015600 CEU core part B6 - CZ.pdf");
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());

View File

@ -26,7 +26,7 @@ class GapAcrossLinesDetectionServiceTest {
@SneakyThrows
public void testGapBasedColumnDetection() {
String filename = "files/invisible_tables/test-two-pages_ocred.pdf";
String filename = "files/basf/CustomerFiles/invisible_tables_test-two-pages_ocred.pdf";
var tmpFileName = "/tmp/" + filename.split("/")[2] + "_COLUMNS.pdf";
System.out.println("start TextPosition extraction");
long start = System.currentTimeMillis();
@ -52,7 +52,7 @@ class GapAcrossLinesDetectionServiceTest {
@SneakyThrows
public void testColumnDetection() {
String filename = "files/invisible_tables/test-two-pages_ocred.pdf";
String filename = "files/basf/CustomerFiles/invisible_tables_test-two-pages_ocred.pdf";
var tmpFileName = "/tmp/" + filename.split("/")[2] + "_COLUMNS.pdf";
System.out.println("start TextPosition extraction");
long start = System.currentTimeMillis();

View File

@ -26,7 +26,7 @@ class InvisibleTableDetectionServiceTest {
@SneakyThrows
public void detectInvisibleTableTest() {
String fileName = "files/invisible_tables/test-two-pages_ocred.pdf";
String fileName = "files/basf/CustomerFiles/invisible_tables_test-two-pages_ocred.pdf";
var tmpFileName = Path.of("/tmp/").resolve(Path.of(fileName).getFileName() + "_TABLE.pdf").toString();
List<PageInformation> pageContents = PageContentExtractor.getSortedPageContents(fileName).stream().map(PageInformationService::build).collect(Collectors.toList());

View File

@ -18,7 +18,7 @@ class MainBodyTextFrameExtractionServiceTest {
@SneakyThrows
public void testMainBodyDetection() {
String fileName = "files/invisible_tables/test-two-pages_ocred.pdf";
String fileName = "files/basf/CustomerFiles/invisible_tables_test-two-pages_ocred.pdf";
String tmpFileName = Path.of("/tmp/").resolve(Path.of(fileName).getFileName() + "_MAIN_BODY.pdf").toString();
List<PageContents> sortedTextPositionSequence = PageContentExtractor.getSortedPageContents(fileName);

View File

@ -20,7 +20,7 @@ class PageContentExtractorTest {
@SneakyThrows
public void testTextPositionSequenceExtraction() {
String fileName = "files/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (4).pdf";
String fileName = "files/syngenta/CustomerFiles/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (4).pdf";
var tmpFileName = Path.of("/tmp/").resolve(Path.of(fileName).getFileName() + "_TEXT_POSITION_SEQUENCES.pdf").toString();
List<PageContents> textPositionPerPage = PageContentExtractor.getSortedPageContents(fileName);

View File

@ -20,7 +20,7 @@ class PageInformationServiceTest {
@SneakyThrows
public void testGapDetection() {
String filename = "files/invisible_tables/test-two-pages_ocred.pdf";
String filename = "files/basf/CustomerFiles/invisible_tables_test-two-pages_ocred.pdf";
var tmpFileName = "/tmp/" + filename.split("/")[2] + "_GAPS.pdf";
System.out.println("start TextPosition extraction");
long start = System.currentTimeMillis();
@ -43,7 +43,7 @@ class PageInformationServiceTest {
@SneakyThrows
public void testLineDetection() {
String filename = "files/invisible_tables/test-two-pages_ocred.pdf";
String filename = "files/basf/CustomerFiles/invisible_tables_test-two-pages_ocred.pdf";
var tmpFileName = "/tmp/" + filename.split("/")[2] + "_GAPS.pdf";
System.out.println("start TextPosition extraction");
long start = System.currentTimeMillis();

View File

@ -46,7 +46,7 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
@SneakyThrows
public void textRectanglesFromRulingsExtraction() {
String fileName = "files/SinglePages/T5 VV-640252-Page16.pdf";
String fileName = "files/syngenta/CustomerFiles/SinglePages/T5_Page16_VV-640252.pdf";
String lineFileName = "/tmp/" + Path.of(fileName).getFileName().toString() + "_CELLS.pdf";
List<PageContents> pageContents = PageContentExtractor.getSortedPageContents(fileName);
RulingCleaningService rulingCleaningService = new RulingCleaningService();
@ -66,7 +66,7 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
@SneakyThrows
public void textRulingExtraction() {
String fileName = "files/SinglePages/19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017_Page35.pdf";
String fileName = "files/syngenta/CustomerFiles/SinglePages/Page35_19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017.pdf";
String lineFileName = "/tmp/" + Path.of(fileName).getFileName().toString() + "_LINES.pdf";
List<PageContents> pageContents = PageContentExtractor.getSortedPageContents(fileName);
RulingCleaningService rulingCleaningService = new RulingCleaningService();

View File

@ -46,7 +46,7 @@ public abstract class BuildDocumentTest extends AbstractTest {
@SneakyThrows
protected Document buildGraph(String filename, LayoutParsingType layoutParsingType) {
if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf")) {
if (filename.equals("files/syngenta/CustomerFiles/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf")) {
prepareStorage(filename, "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json");
} else {
prepareStorage(filename);