|
|
|
|
@ -81,7 +81,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void tablesToHtmlDebugger() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T5 VV-640252-Page16.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/T5_Page16_VV-640252.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -149,7 +149,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testPDFSegmentationWithComplexTable() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Spanning Cells - Page131_S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
|
|
|
|
|
@ -163,61 +163,130 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testTableExtraction() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Table.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
|
|
|
|
"files/syngenta/CustomerFiles/SinglePages/Merge Table - Page5_26 A8637C - EU AIR3 - LCP Section 10 - Ecotoxicological studies on the plant protection product - Reference list.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
|
|
|
|
|
TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0);
|
|
|
|
|
assertThat(document.getSections()
|
|
|
|
|
.stream()
|
|
|
|
|
.flatMap(paragraph -> paragraph.getTables()
|
|
|
|
|
.stream())
|
|
|
|
|
.collect(Collectors.toList())).isNotEmpty();
|
|
|
|
|
TablePageBlock firstTable = document.getSections()
|
|
|
|
|
.stream()
|
|
|
|
|
.flatMap(paragraph -> paragraph.getTables()
|
|
|
|
|
.stream())
|
|
|
|
|
.toList()
|
|
|
|
|
.get(0);
|
|
|
|
|
assertThat(firstTable.getColCount()).isEqualTo(8);
|
|
|
|
|
assertThat(firstTable.getRowCount()).isEqualTo(1);
|
|
|
|
|
TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(1);
|
|
|
|
|
TablePageBlock secondTable = document.getSections()
|
|
|
|
|
.stream()
|
|
|
|
|
.flatMap(paragraph -> paragraph.getTables()
|
|
|
|
|
.stream())
|
|
|
|
|
.toList()
|
|
|
|
|
.get(1);
|
|
|
|
|
assertThat(secondTable.getColCount()).isEqualTo(8);
|
|
|
|
|
assertThat(secondTable.getRowCount()).isEqualTo(2);
|
|
|
|
|
List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(0).stream().map(Collections::singletonList).collect(Collectors.toList());
|
|
|
|
|
assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
|
|
|
|
|
List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
|
|
|
|
|
.get(0)
|
|
|
|
|
.stream()
|
|
|
|
|
.map(Collections::singletonList)
|
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
|
assertThat(secondTable.getRows()
|
|
|
|
|
.stream()
|
|
|
|
|
.allMatch(row -> row.stream()
|
|
|
|
|
.map(Cell::getHeaderCells)
|
|
|
|
|
.toList().equals(firstTableHeaderCells))).isTrue();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testMultiPageMetadataPropagation() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Multi Page Table.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
|
|
|
|
"files/syngenta/CustomerFiles/SinglePages/Merge Multi Page Table - Page4_Page5_51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
|
|
|
|
|
TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0);
|
|
|
|
|
assertThat(document.getSections()
|
|
|
|
|
.stream()
|
|
|
|
|
.flatMap(paragraph -> paragraph.getTables()
|
|
|
|
|
.stream())
|
|
|
|
|
.collect(Collectors.toList())).isNotEmpty();
|
|
|
|
|
TablePageBlock firstTable = document.getSections()
|
|
|
|
|
.stream()
|
|
|
|
|
.flatMap(paragraph -> paragraph.getTables()
|
|
|
|
|
.stream())
|
|
|
|
|
.toList()
|
|
|
|
|
.get(0);
|
|
|
|
|
assertThat(firstTable.getColCount()).isEqualTo(9);
|
|
|
|
|
assertThat(firstTable.getRowCount()).isEqualTo(5);
|
|
|
|
|
TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(1);
|
|
|
|
|
TablePageBlock secondTable = document.getSections()
|
|
|
|
|
.stream()
|
|
|
|
|
.flatMap(paragraph -> paragraph.getTables()
|
|
|
|
|
.stream())
|
|
|
|
|
.toList()
|
|
|
|
|
.get(1);
|
|
|
|
|
assertThat(secondTable.getColCount()).isEqualTo(9);
|
|
|
|
|
assertThat(secondTable.getRowCount()).isEqualTo(6);
|
|
|
|
|
List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(firstTable.getRowCount() - 1).stream().map(Cell::getHeaderCells).collect(Collectors.toList());
|
|
|
|
|
assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
|
|
|
|
|
List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
|
|
|
|
|
.get(firstTable.getRowCount() - 1)
|
|
|
|
|
.stream()
|
|
|
|
|
.map(Cell::getHeaderCells)
|
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
|
assertThat(secondTable.getRows()
|
|
|
|
|
.stream()
|
|
|
|
|
.allMatch(row -> row.stream()
|
|
|
|
|
.map(Cell::getHeaderCells)
|
|
|
|
|
.toList().equals(firstTableHeaderCells))).isTrue();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testHeaderCellsForRotatedTable() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Rotated Table Headers.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
|
|
|
|
"files/syngenta/CustomerFiles/SinglePages/Rotated Table Headers - Page4_65 Mesotrione - EU AIR3 - LCA Section 1 Supplement Reference List.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
|
|
|
|
|
TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0);
|
|
|
|
|
assertThat(document.getSections()
|
|
|
|
|
.stream()
|
|
|
|
|
.flatMap(paragraph -> paragraph.getTables()
|
|
|
|
|
.stream())
|
|
|
|
|
.collect(Collectors.toList())).isNotEmpty();
|
|
|
|
|
TablePageBlock firstTable = document.getSections()
|
|
|
|
|
.stream()
|
|
|
|
|
.flatMap(paragraph -> paragraph.getTables()
|
|
|
|
|
.stream())
|
|
|
|
|
.toList()
|
|
|
|
|
.get(0);
|
|
|
|
|
assertThat(firstTable.getColCount()).isEqualTo(8);
|
|
|
|
|
assertThat(firstTable.getRowCount()).isEqualTo(1);
|
|
|
|
|
TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(1);
|
|
|
|
|
TablePageBlock secondTable = document.getSections()
|
|
|
|
|
.stream()
|
|
|
|
|
.flatMap(paragraph -> paragraph.getTables()
|
|
|
|
|
.stream())
|
|
|
|
|
.toList()
|
|
|
|
|
.get(1);
|
|
|
|
|
assertThat(secondTable.getColCount()).isEqualTo(8);
|
|
|
|
|
assertThat(secondTable.getRowCount()).isEqualTo(6);
|
|
|
|
|
List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(0).stream().map(Collections::singletonList).collect(Collectors.toList());
|
|
|
|
|
assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
|
|
|
|
|
List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
|
|
|
|
|
.get(0)
|
|
|
|
|
.stream()
|
|
|
|
|
.map(Collections::singletonList)
|
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
|
assertThat(secondTable.getRows()
|
|
|
|
|
.stream()
|
|
|
|
|
.allMatch(row -> row.stream()
|
|
|
|
|
.map(Cell::getHeaderCells)
|
|
|
|
|
.toList().equals(firstTableHeaderCells))).isTrue();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testDoc56Page170() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/56 Fludioxonil_RAR_12_Volume_3CA_B-7_2018-02-21_Page170.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page170_56 Fludioxonil_RAR_12_Volume_3CA_B-7_2018-02-21.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -251,7 +320,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testVV931175Page1() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/VV-931175_Page1.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page1_VV-931175.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -292,7 +361,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testDoc27Page6() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/27 A8637C - EU AIR3 - MCP Section 1 - Identity of the plant protection product_Page6.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
|
|
|
|
"files/syngenta/CustomerFiles/SinglePages/Page6_27 A8637C - EU AIR3 - MCP Section 1 - Identity of the plant protection product.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -312,7 +382,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Disabled // FIXME Fake Redactions leads to more cells, no solution for this currently
|
|
|
|
|
public void testDocA20622APartB9Page185() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izRMS (CZ) fRR Part B9_Page185.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page185_A20622A izRMS (CZ) fRR Part B9.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -325,7 +395,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testDocA20622APartB9Page185FixedDoc() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izRMS (CZ) fRR Part B9_Page185_fixed.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page185_fixed_A20622A izRMS (CZ) fRR Part B9.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -338,7 +408,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testDocA20622APartB7Page123() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izZRMS (CZ) fRR Part B7_Page123.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page123_A20622A izZRMS (CZ) fRR Part B7.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -357,7 +427,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testDoc77Page111() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/77 Pirimicarb_RAR_08_Volume_3CA_B-6_2017-12-04_Page11.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/PAge11_77 Pirimicarb_RAR_08_Volume_3CA_B-6_2017-12-04.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -373,7 +443,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testDoc95Page532() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/95 Trinexapac-ethyl_RAR_08_Volume_3CA_B-6_2018-01-10_Page532.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page532_95 Trinexapac-ethyl_RAR_08_Volume_3CA_B-6_2018-01-10.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -386,7 +456,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testDoc52Page175() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21_Page175.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page175_52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -400,7 +470,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testDoc52Page174() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21_Page174.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page174_52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -413,7 +483,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testDoc19Page35() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017_Page35.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
|
|
|
|
"files/syngenta/CustomerFiles/SinglePages/Page35_19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -426,7 +497,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testDoc19Page161() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017_Page161.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
|
|
|
|
"files/syngenta/CustomerFiles/SinglePages/Page161_19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -441,7 +513,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
public void testDoc47Page30() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
|
|
|
|
"files/SinglePages/47 Cyprodinil - EU AIR3 - MCA Section 5 Supplement - Toxicological and metabolism studies on the active substance_Page30.pdf");
|
|
|
|
|
"files/syngenta/CustomerFiles/SinglePages/Page30_47 Cyprodinil - EU AIR3 - MCA Section 5 Supplement - Toxicological and metabolism studies on the active substance.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -457,7 +529,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
public void testDoc49Page61() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
|
|
|
|
"files/SinglePages/49 Cyprodinil - EU AIR3 - MCA Section 8 Supplement - Ecotoxicological studies on the active substance_Page61.pdf");
|
|
|
|
|
"files/syngenta/CustomerFiles/SinglePages/Page61_49 Cyprodinil - EU AIR3 - MCA Section 8 Supplement - Ecotoxicological studies on the active substance.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -472,7 +544,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testDoc81Page54() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/81 Pirimicarb_RAR_20_Volume_3CP_A10788A (_Pirimor_)_B-9_2017-12-04_Page54.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
|
|
|
|
"files/syngenta/CustomerFiles/SinglePages/Page54_81 Pirimicarb_RAR_20_Volume_3CP_A10788A (_Pirimor_)_B-9_2017-12-04.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -487,7 +560,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testDoc88Page134() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/85 Pydiflumetofen_DAR_08_Volume_3CA_B-6_2017-07-26_Page134.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page134_85 Pydiflumetofen_DAR_08_Volume_3CA_B-6_2017-07-26.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -502,7 +575,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testDocThiabendazolePage18() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/Thiabendazole DAR Addendum for ED_April_2020_Page18.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page18_Thiabendazole DAR Addendum for ED_April_2020.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -519,7 +592,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testDoc15Page18() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/15 - Pretilachlor - Acute Oral Toxicity (Up and Down Procedure) - Rat_Page18.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
|
|
|
|
"files/syngenta/CustomerFiles/SinglePages/Page18_15 - Pretilachlor - Acute Oral Toxicity (Up and Down Procedure) - Rat.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -534,7 +608,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
public void testDoc28Page23() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
|
|
|
|
"files/SinglePages/28 A8637C - EU AIR3 - MCP Section 10 - Ecotoxicological studies on the plant protection product_Page23.pdf");
|
|
|
|
|
"files/syngenta/CustomerFiles/SinglePages/Page23_28 A8637C - EU AIR3 - MCP Section 10 - Ecotoxicological studies on the plant protection product.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -549,7 +623,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testDoc24Page17() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/24 - SYN549522 - Acute Oral Toxicity - Rats_Page17.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page17_24 - SYN549522 - Acute Oral Toxicity - Rats.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -563,7 +637,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testDoc30Page5() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/30 - Dicamba - Acute Oral Toxicity - Rats_Page5.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/Page5_30 - Dicamba - Acute Oral Toxicity - Rats.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -626,7 +700,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testT3() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T3 S-Meto_Page29.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/T3_Page29_S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -640,7 +714,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testT4() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T4 138 IDD0000261736_Page16.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/T4_Page16_138 IDD0000261736.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -654,7 +728,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testT5() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T5 VV-640252-Page16.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/T5_Page16_VV-640252.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
@ -670,7 +744,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|
|
|
|
@Test
|
|
|
|
|
public void testMergedEntities_Page26() throws IOException {
|
|
|
|
|
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/MergedEntities.pdf");
|
|
|
|
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page26_fRR A23317A PI0015600 CEU core part B6 - CZ.pdf");
|
|
|
|
|
|
|
|
|
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
|
|
|
|
|
|
|
|
|
|