* fixed tests
This commit is contained in:
parent
a6ba66b1aa
commit
84148d3b6e
@ -45,8 +45,8 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void testViewerDocument() {
|
public void testViewerDocument() {
|
||||||
|
|
||||||
String fileName = "files/2Tables.pdf";
|
String fileName = "files/bdr/notMergedParagraphs.pdf";
|
||||||
String tmpFileName = "C:/Users/YANNIK~1/AppData/Local/Temp/2Tables.lines.pdf";
|
String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
|
||||||
LayoutGridService layoutGridService = new LayoutGridService();
|
LayoutGridService layoutGridService = new LayoutGridService();
|
||||||
ViewerDocumentService viewerDocumentService = new ViewerDocumentService(layoutGridService);
|
ViewerDocumentService viewerDocumentService = new ViewerDocumentService(layoutGridService);
|
||||||
Document document = buildGraph(fileName, LayoutParsingType.TAAS);
|
Document document = buildGraph(fileName, LayoutParsingType.TAAS);
|
||||||
@ -55,48 +55,6 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
@SneakyThrows
|
|
||||||
public void testTableViewerDocument() {
|
|
||||||
|
|
||||||
String fileName = "C:\\Users\\YannikHampe\\repos\\layout-parser\\layoutparser-service\\layoutparser-service-server\\src\\test\\resources\\files\\SinglePages\\VV-931175_Page1.pdf";
|
|
||||||
String tmpFileName = "C:/Users/YANNIK~1/AppData/Local/Temp/page1.lines.pdf";
|
|
||||||
Document documentGraph = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
|
|
||||||
Loader.loadPDF(Path.of(fileName).toFile()),
|
|
||||||
new ImageServiceResponse(),
|
|
||||||
new TableServiceResponse()));
|
|
||||||
LayoutGridService layoutGridService = new LayoutGridService();
|
|
||||||
ViewerDocumentService viewerDocumentService = new ViewerDocumentService(layoutGridService);
|
|
||||||
try (var pdDocument = Loader.loadPDF(Path.of(fileName).toFile()); var out = new FileOutputStream(tmpFileName)) {
|
|
||||||
viewerDocumentService.createViewerDocument(pdDocument, documentGraph, out, true);
|
|
||||||
}
|
|
||||||
//durch rows
|
|
||||||
DocumentData documentData = DocumentDataMapper.toDocumentData(documentGraph);
|
|
||||||
int emptyCellCount = 0;
|
|
||||||
List listStructure2 = documentData.getDocumentStructure()
|
|
||||||
.streamAllEntries()
|
|
||||||
.filter(entryData -> entryData.getType().equals(NodeType.TABLE))
|
|
||||||
.map(DocumentStructure.EntryData::getProperties)
|
|
||||||
.map(properties -> {
|
|
||||||
var builder = Table.builder();
|
|
||||||
PropertiesMapper.parseTableProperties(properties, builder);
|
|
||||||
return builder.build();
|
|
||||||
}).toList();
|
|
||||||
for(int i = 0; i < listStructure2.size(); i++) {
|
|
||||||
emptyCellCount = ((Table) listStructure2.get(i)).getEmptyCells();
|
|
||||||
}
|
|
||||||
|
|
||||||
System.out.println("Empty cells "+emptyCellCount);
|
|
||||||
|
|
||||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(Path.of(fileName).toFile()));
|
|
||||||
TablePageBlock table = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0);
|
|
||||||
int emptyCellsFoundFound = table.getRows().stream().flatMap(List::stream).toList().stream().filter(f -> f.toString().equals("")).toList().size();
|
|
||||||
for (List<Cell> row : table.getRows()) {
|
|
||||||
System.out.println(row.toString());
|
|
||||||
}
|
|
||||||
System.out.println("Actual number of empty rows: "+emptyCellsFoundFound);
|
|
||||||
}
|
|
||||||
|
|
||||||
public ClassificationDocument buildClassificationDocument(PDDocument originDocument) {
|
public ClassificationDocument buildClassificationDocument(PDDocument originDocument) {
|
||||||
|
|
||||||
ClassificationDocument classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
|
ClassificationDocument classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
|
||||||
|
|||||||
@ -40,26 +40,15 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void textRulingExtraction() {
|
public void textRulingExtraction() {
|
||||||
|
|
||||||
String fileName = "/files/102 S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf";
|
String fileName = "files/211.pdf";
|
||||||
String lineFileName = "C:/Users/YANNIK~1/AppData/Local/Temp/102 S-Metolachlor_RAR_02_Volume_2_2018-09-06.after.pdf";
|
String lineFileName = "/tmp/" + Path.of(fileName).getFileName().toString() + "_LINES.pdf";
|
||||||
List<PageContents> pageContents = PageContentExtractor.getSortedPageContents(fileName);
|
List<PageContents> pageContents = PageContentExtractor.getSortedPageContents(fileName);
|
||||||
RulingCleaningService rulingCleaningService = new RulingCleaningService();
|
RulingCleaningService rulingCleaningService = new RulingCleaningService();
|
||||||
|
PdfDraw.drawLinesPerPage(fileName, pageContents.stream().map(PageContents::getRulings).toList(), lineFileName);
|
||||||
List<CleanRulings> cleanRulingsPerPage = new LinkedList<>();
|
List<CleanRulings> cleanRulingsPerPage = new LinkedList<>();
|
||||||
writeJsons(Path.of(fileName));
|
|
||||||
for (PageContents pageContent : pageContents) {
|
for (PageContents pageContent : pageContents) {
|
||||||
cleanRulingsPerPage.add(rulingCleaningService.getCleanRulings(Collections.emptyList(), pageContent.getRulings()));
|
cleanRulingsPerPage.add(rulingCleaningService.getCleanRulings(Collections.emptyList(), pageContent.getRulings()));
|
||||||
}
|
}
|
||||||
PdfDraw.drawLinesPerPage(fileName, pageContents.stream().map(PageContents::getRulings).toList(), lineFileName);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
|
||||||
@SneakyThrows
|
|
||||||
public void testTableExtractionSingle() {
|
|
||||||
|
|
||||||
String filename = "C:\\Users\\YannikHampe\\repos\\layout-parser\\layoutparser-service\\layoutparser-service-server\\src\\test\\resources\\files\\SinglePages\\24 - SYN549522 - Acute Oral Toxicity - Rats_Page17.pdf";
|
|
||||||
writeJsons(Path.of(filename));
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user