RED-8701 - Move files to customer data repositories
- update unit tests with the new path to submodules for customer files
This commit is contained in:
parent
6b1b5eab84
commit
20e4e5ddff
@ -69,11 +69,11 @@ public class HeadlinesGoldStandardIntegrationTest {
|
|||||||
public void testHeadlineDetection() {
|
public void testHeadlineDetection() {
|
||||||
|
|
||||||
List<Metrics> metrics = new ArrayList<>();
|
List<Metrics> metrics = new ArrayList<>();
|
||||||
metrics.add(getMetrics("files/headlineTest/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf",
|
metrics.add(getMetrics("files/syngenta/CustomerFiles/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf",
|
||||||
"files/headlineTest/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1)_REDACTION_LOG.json"));
|
"files/headlineTest/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1)_REDACTION_LOG.json"));
|
||||||
metrics.add(getMetrics("files/headlineTest/91 Trinexapac-ethyl_RAR_01_Volume_1_2018-02-23.pdf",
|
metrics.add(getMetrics("files/syngenta/CustomerFiles/91 Trinexapac-ethyl_RAR_01_Volume_1_2018-02-23.pdf",
|
||||||
"files/headlineTest/91 Trinexapac-ethyl_RAR_01_Volume_1_2018-02-23_REDACTION_LOG.json"));
|
"files/headlineTest/91 Trinexapac-ethyl_RAR_01_Volume_1_2018-02-23_REDACTION_LOG.json"));
|
||||||
metrics.add(getMetrics("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf", "files/headlineTest/S-Metolachlor_RAR_01_Volume_1_2018-09-06_REDACTION_LOG.json"));
|
metrics.add(getMetrics("files/syngenta/CustomerFiles/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf", "files/headlineTest/S-Metolachlor_RAR_01_Volume_1_2018-09-06_REDACTION_LOG.json"));
|
||||||
|
|
||||||
double precision = metrics.stream().mapToDouble(Metrics::getPrecision).average().orElse(1.0);
|
double precision = metrics.stream().mapToDouble(Metrics::getPrecision).average().orElse(1.0);
|
||||||
double recall = metrics.stream().mapToDouble(Metrics::getRecall).average().orElse(1.0);
|
double recall = metrics.stream().mapToDouble(Metrics::getRecall).average().orElse(1.0);
|
||||||
|
|||||||
@ -37,7 +37,7 @@ public class LayoutparserEnd2EndTest extends AbstractTest {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void testLayoutParserEndToEnd_RED_8747() {
|
public void testLayoutParserEndToEnd_RED_8747() {
|
||||||
|
|
||||||
prepareStorage("files/SinglePages/MergedEntities.pdf");
|
prepareStorage("files/syngenta/CustomerFiles/SinglePages/Page26_fRR A23317A PI0015600 CEU core part B6 - CZ.pdf");
|
||||||
LayoutParsingRequest layoutParsingRequest = buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER_OLD);
|
LayoutParsingRequest layoutParsingRequest = buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER_OLD);
|
||||||
LayoutParsingFinishedEvent finishedEvent = layoutParsingPipeline.parseLayoutAndSaveFilesToStorage(layoutParsingRequest);
|
LayoutParsingFinishedEvent finishedEvent = layoutParsingPipeline.parseLayoutAndSaveFilesToStorage(layoutParsingRequest);
|
||||||
Arrays.stream(finishedEvent.message().split("\n"))
|
Arrays.stream(finishedEvent.message().split("\n"))
|
||||||
|
|||||||
@ -48,7 +48,7 @@ public class DocumentGraphJsonWritingTest extends BuildDocumentTest {
|
|||||||
@Disabled
|
@Disabled
|
||||||
public void writeJsonForFileTest() {
|
public void writeJsonForFileTest() {
|
||||||
|
|
||||||
var resource = new ClassPathResource("files/1 Abamectin_prr.pdf");
|
var resource = new ClassPathResource("files/syngenta/CustomerFiles/1 Abamectin_prr.pdf");
|
||||||
writeJsons(resource.getFile().toPath());
|
writeJsons(resource.getFile().toPath());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -27,7 +27,7 @@ public class DocumentGraphMappingTest extends BuildDocumentTest {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void testGraphMapping() {
|
public void testGraphMapping() {
|
||||||
|
|
||||||
String filename = "files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf";
|
String filename = "files/syngenta/CustomerFiles/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf";
|
||||||
|
|
||||||
Document document = buildGraph(filename);
|
Document document = buildGraph(filename);
|
||||||
DocumentData documentData = DocumentDataMapper.toDocumentData(document);
|
DocumentData documentData = DocumentDataMapper.toDocumentData(document);
|
||||||
|
|||||||
@ -28,7 +28,7 @@ public class DocumentGraphVisualizationTest extends BuildDocumentTest {
|
|||||||
// @Disabled
|
// @Disabled
|
||||||
public void visualizeMetolachlor() {
|
public void visualizeMetolachlor() {
|
||||||
|
|
||||||
String filename = "files/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (4).pdf";
|
String filename = "files/syngenta/CustomerFiles/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (4).pdf";
|
||||||
visualizePdf(filename);
|
visualizePdf(filename);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -48,7 +48,7 @@ public class DocumentGraphVisualizationTest extends BuildDocumentTest {
|
|||||||
@Disabled
|
@Disabled
|
||||||
public void visualizeCraftedDocument() {
|
public void visualizeCraftedDocument() {
|
||||||
|
|
||||||
String filename = "files/1 Abamectin_prr.pdf";
|
String filename = "files/syngenta/CustomerFiles/1 Abamectin_prr.pdf";
|
||||||
visualizePdf(filename);
|
visualizePdf(filename);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -27,7 +27,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void testViewerDocument() {
|
public void testViewerDocument() {
|
||||||
|
|
||||||
String fileName = "files/new/ScrambledTextAfterSorting.pdf";
|
String fileName = "files/syngenta/CustomerFiles/SinglePages/ScrambledTextAfterSorting.pdf";
|
||||||
String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
|
String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
|
||||||
|
|
||||||
var documentFile = new ClassPathResource(fileName).getFile();
|
var documentFile = new ClassPathResource(fileName).getFile();
|
||||||
|
|||||||
@ -81,7 +81,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void tablesToHtmlDebugger() throws IOException {
|
public void tablesToHtmlDebugger() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T5 VV-640252-Page16.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/T5_Page16_VV-640252.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -149,7 +149,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testPDFSegmentationWithComplexTable() throws IOException {
|
public void testPDFSegmentationWithComplexTable() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Spanning Cells - Page131_S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
|
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
|
||||||
@ -163,61 +163,130 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testTableExtraction() throws IOException {
|
public void testTableExtraction() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Table.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
||||||
|
"files/syngenta/CustomerFiles/SinglePages/Merge Table - Page5_26 A8637C - EU AIR3 - LCP Section 10 - Ecotoxicological studies on the plant protection product - Reference list.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
|
assertThat(document.getSections()
|
||||||
TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0);
|
.stream()
|
||||||
|
.flatMap(paragraph -> paragraph.getTables()
|
||||||
|
.stream())
|
||||||
|
.collect(Collectors.toList())).isNotEmpty();
|
||||||
|
TablePageBlock firstTable = document.getSections()
|
||||||
|
.stream()
|
||||||
|
.flatMap(paragraph -> paragraph.getTables()
|
||||||
|
.stream())
|
||||||
|
.toList()
|
||||||
|
.get(0);
|
||||||
assertThat(firstTable.getColCount()).isEqualTo(8);
|
assertThat(firstTable.getColCount()).isEqualTo(8);
|
||||||
assertThat(firstTable.getRowCount()).isEqualTo(1);
|
assertThat(firstTable.getRowCount()).isEqualTo(1);
|
||||||
TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(1);
|
TablePageBlock secondTable = document.getSections()
|
||||||
|
.stream()
|
||||||
|
.flatMap(paragraph -> paragraph.getTables()
|
||||||
|
.stream())
|
||||||
|
.toList()
|
||||||
|
.get(1);
|
||||||
assertThat(secondTable.getColCount()).isEqualTo(8);
|
assertThat(secondTable.getColCount()).isEqualTo(8);
|
||||||
assertThat(secondTable.getRowCount()).isEqualTo(2);
|
assertThat(secondTable.getRowCount()).isEqualTo(2);
|
||||||
List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(0).stream().map(Collections::singletonList).collect(Collectors.toList());
|
List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
|
||||||
assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
|
.get(0)
|
||||||
|
.stream()
|
||||||
|
.map(Collections::singletonList)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
assertThat(secondTable.getRows()
|
||||||
|
.stream()
|
||||||
|
.allMatch(row -> row.stream()
|
||||||
|
.map(Cell::getHeaderCells)
|
||||||
|
.toList().equals(firstTableHeaderCells))).isTrue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMultiPageMetadataPropagation() throws IOException {
|
public void testMultiPageMetadataPropagation() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Multi Page Table.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
||||||
|
"files/syngenta/CustomerFiles/SinglePages/Merge Multi Page Table - Page4_Page5_51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
|
assertThat(document.getSections()
|
||||||
TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0);
|
.stream()
|
||||||
|
.flatMap(paragraph -> paragraph.getTables()
|
||||||
|
.stream())
|
||||||
|
.collect(Collectors.toList())).isNotEmpty();
|
||||||
|
TablePageBlock firstTable = document.getSections()
|
||||||
|
.stream()
|
||||||
|
.flatMap(paragraph -> paragraph.getTables()
|
||||||
|
.stream())
|
||||||
|
.toList()
|
||||||
|
.get(0);
|
||||||
assertThat(firstTable.getColCount()).isEqualTo(9);
|
assertThat(firstTable.getColCount()).isEqualTo(9);
|
||||||
assertThat(firstTable.getRowCount()).isEqualTo(5);
|
assertThat(firstTable.getRowCount()).isEqualTo(5);
|
||||||
TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(1);
|
TablePageBlock secondTable = document.getSections()
|
||||||
|
.stream()
|
||||||
|
.flatMap(paragraph -> paragraph.getTables()
|
||||||
|
.stream())
|
||||||
|
.toList()
|
||||||
|
.get(1);
|
||||||
assertThat(secondTable.getColCount()).isEqualTo(9);
|
assertThat(secondTable.getColCount()).isEqualTo(9);
|
||||||
assertThat(secondTable.getRowCount()).isEqualTo(6);
|
assertThat(secondTable.getRowCount()).isEqualTo(6);
|
||||||
List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(firstTable.getRowCount() - 1).stream().map(Cell::getHeaderCells).collect(Collectors.toList());
|
List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
|
||||||
assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
|
.get(firstTable.getRowCount() - 1)
|
||||||
|
.stream()
|
||||||
|
.map(Cell::getHeaderCells)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
assertThat(secondTable.getRows()
|
||||||
|
.stream()
|
||||||
|
.allMatch(row -> row.stream()
|
||||||
|
.map(Cell::getHeaderCells)
|
||||||
|
.toList().equals(firstTableHeaderCells))).isTrue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHeaderCellsForRotatedTable() throws IOException {
|
public void testHeaderCellsForRotatedTable() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Rotated Table Headers.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
||||||
|
"files/syngenta/CustomerFiles/SinglePages/Rotated Table Headers - Page4_65 Mesotrione - EU AIR3 - LCA Section 1 Supplement Reference List.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
|
assertThat(document.getSections()
|
||||||
TablePageBlock firstTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0);
|
.stream()
|
||||||
|
.flatMap(paragraph -> paragraph.getTables()
|
||||||
|
.stream())
|
||||||
|
.collect(Collectors.toList())).isNotEmpty();
|
||||||
|
TablePageBlock firstTable = document.getSections()
|
||||||
|
.stream()
|
||||||
|
.flatMap(paragraph -> paragraph.getTables()
|
||||||
|
.stream())
|
||||||
|
.toList()
|
||||||
|
.get(0);
|
||||||
assertThat(firstTable.getColCount()).isEqualTo(8);
|
assertThat(firstTable.getColCount()).isEqualTo(8);
|
||||||
assertThat(firstTable.getRowCount()).isEqualTo(1);
|
assertThat(firstTable.getRowCount()).isEqualTo(1);
|
||||||
TablePageBlock secondTable = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(1);
|
TablePageBlock secondTable = document.getSections()
|
||||||
|
.stream()
|
||||||
|
.flatMap(paragraph -> paragraph.getTables()
|
||||||
|
.stream())
|
||||||
|
.toList()
|
||||||
|
.get(1);
|
||||||
assertThat(secondTable.getColCount()).isEqualTo(8);
|
assertThat(secondTable.getColCount()).isEqualTo(8);
|
||||||
assertThat(secondTable.getRowCount()).isEqualTo(6);
|
assertThat(secondTable.getRowCount()).isEqualTo(6);
|
||||||
List<List<Cell>> firstTableHeaderCells = firstTable.getRows().get(0).stream().map(Collections::singletonList).collect(Collectors.toList());
|
List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
|
||||||
assertThat(secondTable.getRows().stream().allMatch(row -> row.stream().map(Cell::getHeaderCells).toList().equals(firstTableHeaderCells))).isTrue();
|
.get(0)
|
||||||
|
.stream()
|
||||||
|
.map(Collections::singletonList)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
assertThat(secondTable.getRows()
|
||||||
|
.stream()
|
||||||
|
.allMatch(row -> row.stream()
|
||||||
|
.map(Cell::getHeaderCells)
|
||||||
|
.toList().equals(firstTableHeaderCells))).isTrue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDoc56Page170() throws IOException {
|
public void testDoc56Page170() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/56 Fludioxonil_RAR_12_Volume_3CA_B-7_2018-02-21_Page170.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page170_56 Fludioxonil_RAR_12_Volume_3CA_B-7_2018-02-21.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -251,7 +320,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testVV931175Page1() throws IOException {
|
public void testVV931175Page1() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/VV-931175_Page1.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page1_VV-931175.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -292,7 +361,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testDoc27Page6() throws IOException {
|
public void testDoc27Page6() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/27 A8637C - EU AIR3 - MCP Section 1 - Identity of the plant protection product_Page6.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
||||||
|
"files/syngenta/CustomerFiles/SinglePages/Page6_27 A8637C - EU AIR3 - MCP Section 1 - Identity of the plant protection product.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -312,7 +382,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Disabled // FIXME Fake Redactions leads to more cells, no solution for this currently
|
@Disabled // FIXME Fake Redactions leads to more cells, no solution for this currently
|
||||||
public void testDocA20622APartB9Page185() throws IOException {
|
public void testDocA20622APartB9Page185() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izRMS (CZ) fRR Part B9_Page185.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page185_A20622A izRMS (CZ) fRR Part B9.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -325,7 +395,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testDocA20622APartB9Page185FixedDoc() throws IOException {
|
public void testDocA20622APartB9Page185FixedDoc() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izRMS (CZ) fRR Part B9_Page185_fixed.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page185_fixed_A20622A izRMS (CZ) fRR Part B9.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -338,7 +408,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testDocA20622APartB7Page123() throws IOException {
|
public void testDocA20622APartB7Page123() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/A20622A izZRMS (CZ) fRR Part B7_Page123.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page123_A20622A izZRMS (CZ) fRR Part B7.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -357,7 +427,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testDoc77Page111() throws IOException {
|
public void testDoc77Page111() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/77 Pirimicarb_RAR_08_Volume_3CA_B-6_2017-12-04_Page11.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/PAge11_77 Pirimicarb_RAR_08_Volume_3CA_B-6_2017-12-04.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -373,7 +443,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testDoc95Page532() throws IOException {
|
public void testDoc95Page532() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/95 Trinexapac-ethyl_RAR_08_Volume_3CA_B-6_2018-01-10_Page532.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page532_95 Trinexapac-ethyl_RAR_08_Volume_3CA_B-6_2018-01-10.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -386,7 +456,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testDoc52Page175() throws IOException {
|
public void testDoc52Page175() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21_Page175.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page175_52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -400,7 +470,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testDoc52Page174() throws IOException {
|
public void testDoc52Page174() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21_Page174.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page174_52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -413,7 +483,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testDoc19Page35() throws IOException {
|
public void testDoc19Page35() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017_Page35.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
||||||
|
"files/syngenta/CustomerFiles/SinglePages/Page35_19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -426,7 +497,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testDoc19Page161() throws IOException {
|
public void testDoc19Page161() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017_Page161.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
||||||
|
"files/syngenta/CustomerFiles/SinglePages/Page161_19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -441,7 +513,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
public void testDoc47Page30() throws IOException {
|
public void testDoc47Page30() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource(
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
||||||
"files/SinglePages/47 Cyprodinil - EU AIR3 - MCA Section 5 Supplement - Toxicological and metabolism studies on the active substance_Page30.pdf");
|
"files/syngenta/CustomerFiles/SinglePages/Page30_47 Cyprodinil - EU AIR3 - MCA Section 5 Supplement - Toxicological and metabolism studies on the active substance.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -457,7 +529,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
public void testDoc49Page61() throws IOException {
|
public void testDoc49Page61() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource(
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
||||||
"files/SinglePages/49 Cyprodinil - EU AIR3 - MCA Section 8 Supplement - Ecotoxicological studies on the active substance_Page61.pdf");
|
"files/syngenta/CustomerFiles/SinglePages/Page61_49 Cyprodinil - EU AIR3 - MCA Section 8 Supplement - Ecotoxicological studies on the active substance.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -472,7 +544,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testDoc81Page54() throws IOException {
|
public void testDoc81Page54() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/81 Pirimicarb_RAR_20_Volume_3CP_A10788A (_Pirimor_)_B-9_2017-12-04_Page54.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
||||||
|
"files/syngenta/CustomerFiles/SinglePages/Page54_81 Pirimicarb_RAR_20_Volume_3CP_A10788A (_Pirimor_)_B-9_2017-12-04.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -487,7 +560,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testDoc88Page134() throws IOException {
|
public void testDoc88Page134() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/85 Pydiflumetofen_DAR_08_Volume_3CA_B-6_2017-07-26_Page134.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page134_85 Pydiflumetofen_DAR_08_Volume_3CA_B-6_2017-07-26.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -502,7 +575,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testDocThiabendazolePage18() throws IOException {
|
public void testDocThiabendazolePage18() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/Thiabendazole DAR Addendum for ED_April_2020_Page18.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page18_Thiabendazole DAR Addendum for ED_April_2020.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -519,7 +592,8 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testDoc15Page18() throws IOException {
|
public void testDoc15Page18() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/15 - Pretilachlor - Acute Oral Toxicity (Up and Down Procedure) - Rat_Page18.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
||||||
|
"files/syngenta/CustomerFiles/SinglePages/Page18_15 - Pretilachlor - Acute Oral Toxicity (Up and Down Procedure) - Rat.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -534,7 +608,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
public void testDoc28Page23() throws IOException {
|
public void testDoc28Page23() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource(
|
ClassPathResource pdfFileResource = new ClassPathResource(
|
||||||
"files/SinglePages/28 A8637C - EU AIR3 - MCP Section 10 - Ecotoxicological studies on the plant protection product_Page23.pdf");
|
"files/syngenta/CustomerFiles/SinglePages/Page23_28 A8637C - EU AIR3 - MCP Section 10 - Ecotoxicological studies on the plant protection product.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -549,7 +623,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testDoc24Page17() throws IOException {
|
public void testDoc24Page17() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/24 - SYN549522 - Acute Oral Toxicity - Rats_Page17.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page17_24 - SYN549522 - Acute Oral Toxicity - Rats.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -563,7 +637,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testDoc30Page5() throws IOException {
|
public void testDoc30Page5() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/30 - Dicamba - Acute Oral Toxicity - Rats_Page5.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/Page5_30 - Dicamba - Acute Oral Toxicity - Rats.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -626,7 +700,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testT3() throws IOException {
|
public void testT3() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T3 S-Meto_Page29.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/T3_Page29_S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -640,7 +714,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testT4() throws IOException {
|
public void testT4() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T4 138 IDD0000261736_Page16.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/T4_Page16_138 IDD0000261736.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -654,7 +728,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testT5() throws IOException {
|
public void testT5() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/T5 VV-640252-Page16.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/T5_Page16_VV-640252.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
@ -670,7 +744,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testMergedEntities_Page26() throws IOException {
|
public void testMergedEntities_Page26() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/MergedEntities.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/syngenta/CustomerFiles/SinglePages/Page26_fRR A23317A PI0015600 CEU core part B6 - CZ.pdf");
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
|
|||||||
@ -26,7 +26,7 @@ class GapAcrossLinesDetectionServiceTest {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void testGapBasedColumnDetection() {
|
public void testGapBasedColumnDetection() {
|
||||||
|
|
||||||
String filename = "files/invisible_tables/test-two-pages_ocred.pdf";
|
String filename = "files/basf/CustomerFiles/invisible_tables_test-two-pages_ocred.pdf";
|
||||||
var tmpFileName = "/tmp/" + filename.split("/")[2] + "_COLUMNS.pdf";
|
var tmpFileName = "/tmp/" + filename.split("/")[2] + "_COLUMNS.pdf";
|
||||||
System.out.println("start TextPosition extraction");
|
System.out.println("start TextPosition extraction");
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
@ -52,7 +52,7 @@ class GapAcrossLinesDetectionServiceTest {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void testColumnDetection() {
|
public void testColumnDetection() {
|
||||||
|
|
||||||
String filename = "files/invisible_tables/test-two-pages_ocred.pdf";
|
String filename = "files/basf/CustomerFiles/invisible_tables_test-two-pages_ocred.pdf";
|
||||||
var tmpFileName = "/tmp/" + filename.split("/")[2] + "_COLUMNS.pdf";
|
var tmpFileName = "/tmp/" + filename.split("/")[2] + "_COLUMNS.pdf";
|
||||||
System.out.println("start TextPosition extraction");
|
System.out.println("start TextPosition extraction");
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
|
|||||||
@ -26,7 +26,7 @@ class InvisibleTableDetectionServiceTest {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void detectInvisibleTableTest() {
|
public void detectInvisibleTableTest() {
|
||||||
|
|
||||||
String fileName = "files/invisible_tables/test-two-pages_ocred.pdf";
|
String fileName = "files/basf/CustomerFiles/invisible_tables_test-two-pages_ocred.pdf";
|
||||||
var tmpFileName = Path.of("/tmp/").resolve(Path.of(fileName).getFileName() + "_TABLE.pdf").toString();
|
var tmpFileName = Path.of("/tmp/").resolve(Path.of(fileName).getFileName() + "_TABLE.pdf").toString();
|
||||||
List<PageInformation> pageContents = PageContentExtractor.getSortedPageContents(fileName).stream().map(PageInformationService::build).collect(Collectors.toList());
|
List<PageInformation> pageContents = PageContentExtractor.getSortedPageContents(fileName).stream().map(PageInformationService::build).collect(Collectors.toList());
|
||||||
|
|
||||||
|
|||||||
@ -18,7 +18,7 @@ class MainBodyTextFrameExtractionServiceTest {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void testMainBodyDetection() {
|
public void testMainBodyDetection() {
|
||||||
|
|
||||||
String fileName = "files/invisible_tables/test-two-pages_ocred.pdf";
|
String fileName = "files/basf/CustomerFiles/invisible_tables_test-two-pages_ocred.pdf";
|
||||||
String tmpFileName = Path.of("/tmp/").resolve(Path.of(fileName).getFileName() + "_MAIN_BODY.pdf").toString();
|
String tmpFileName = Path.of("/tmp/").resolve(Path.of(fileName).getFileName() + "_MAIN_BODY.pdf").toString();
|
||||||
List<PageContents> sortedTextPositionSequence = PageContentExtractor.getSortedPageContents(fileName);
|
List<PageContents> sortedTextPositionSequence = PageContentExtractor.getSortedPageContents(fileName);
|
||||||
|
|
||||||
|
|||||||
@ -20,7 +20,7 @@ class PageContentExtractorTest {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void testTextPositionSequenceExtraction() {
|
public void testTextPositionSequenceExtraction() {
|
||||||
|
|
||||||
String fileName = "files/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (4).pdf";
|
String fileName = "files/syngenta/CustomerFiles/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (4).pdf";
|
||||||
var tmpFileName = Path.of("/tmp/").resolve(Path.of(fileName).getFileName() + "_TEXT_POSITION_SEQUENCES.pdf").toString();
|
var tmpFileName = Path.of("/tmp/").resolve(Path.of(fileName).getFileName() + "_TEXT_POSITION_SEQUENCES.pdf").toString();
|
||||||
|
|
||||||
List<PageContents> textPositionPerPage = PageContentExtractor.getSortedPageContents(fileName);
|
List<PageContents> textPositionPerPage = PageContentExtractor.getSortedPageContents(fileName);
|
||||||
|
|||||||
@ -20,7 +20,7 @@ class PageInformationServiceTest {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void testGapDetection() {
|
public void testGapDetection() {
|
||||||
|
|
||||||
String filename = "files/invisible_tables/test-two-pages_ocred.pdf";
|
String filename = "files/basf/CustomerFiles/invisible_tables_test-two-pages_ocred.pdf";
|
||||||
var tmpFileName = "/tmp/" + filename.split("/")[2] + "_GAPS.pdf";
|
var tmpFileName = "/tmp/" + filename.split("/")[2] + "_GAPS.pdf";
|
||||||
System.out.println("start TextPosition extraction");
|
System.out.println("start TextPosition extraction");
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
@ -43,7 +43,7 @@ class PageInformationServiceTest {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void testLineDetection() {
|
public void testLineDetection() {
|
||||||
|
|
||||||
String filename = "files/invisible_tables/test-two-pages_ocred.pdf";
|
String filename = "files/basf/CustomerFiles/invisible_tables_test-two-pages_ocred.pdf";
|
||||||
var tmpFileName = "/tmp/" + filename.split("/")[2] + "_GAPS.pdf";
|
var tmpFileName = "/tmp/" + filename.split("/")[2] + "_GAPS.pdf";
|
||||||
System.out.println("start TextPosition extraction");
|
System.out.println("start TextPosition extraction");
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
|
|||||||
@ -46,7 +46,7 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void textRectanglesFromRulingsExtraction() {
|
public void textRectanglesFromRulingsExtraction() {
|
||||||
|
|
||||||
String fileName = "files/SinglePages/T5 VV-640252-Page16.pdf";
|
String fileName = "files/syngenta/CustomerFiles/SinglePages/T5_Page16_VV-640252.pdf";
|
||||||
String lineFileName = "/tmp/" + Path.of(fileName).getFileName().toString() + "_CELLS.pdf";
|
String lineFileName = "/tmp/" + Path.of(fileName).getFileName().toString() + "_CELLS.pdf";
|
||||||
List<PageContents> pageContents = PageContentExtractor.getSortedPageContents(fileName);
|
List<PageContents> pageContents = PageContentExtractor.getSortedPageContents(fileName);
|
||||||
RulingCleaningService rulingCleaningService = new RulingCleaningService();
|
RulingCleaningService rulingCleaningService = new RulingCleaningService();
|
||||||
@ -66,7 +66,7 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void textRulingExtraction() {
|
public void textRulingExtraction() {
|
||||||
|
|
||||||
String fileName = "files/SinglePages/19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017_Page35.pdf";
|
String fileName = "files/syngenta/CustomerFiles/SinglePages/Page35_19 Chlorothalonil RAR 08 Volume 3CA B 6b metabolites Oct 2017.pdf";
|
||||||
String lineFileName = "/tmp/" + Path.of(fileName).getFileName().toString() + "_LINES.pdf";
|
String lineFileName = "/tmp/" + Path.of(fileName).getFileName().toString() + "_LINES.pdf";
|
||||||
List<PageContents> pageContents = PageContentExtractor.getSortedPageContents(fileName);
|
List<PageContents> pageContents = PageContentExtractor.getSortedPageContents(fileName);
|
||||||
RulingCleaningService rulingCleaningService = new RulingCleaningService();
|
RulingCleaningService rulingCleaningService = new RulingCleaningService();
|
||||||
|
|||||||
@ -46,7 +46,7 @@ public abstract class BuildDocumentTest extends AbstractTest {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
protected Document buildGraph(String filename, LayoutParsingType layoutParsingType) {
|
protected Document buildGraph(String filename, LayoutParsingType layoutParsingType) {
|
||||||
|
|
||||||
if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf")) {
|
if (filename.equals("files/syngenta/CustomerFiles/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf")) {
|
||||||
prepareStorage(filename, "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json");
|
prepareStorage(filename, "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json");
|
||||||
} else {
|
} else {
|
||||||
prepareStorage(filename);
|
prepareStorage(filename);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user