* fixed tests
This commit is contained in:
parent
a6ba66b1aa
commit
84148d3b6e
@ -45,8 +45,8 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
||||
@SneakyThrows
|
||||
public void testViewerDocument() {
|
||||
|
||||
String fileName = "files/2Tables.pdf";
|
||||
String tmpFileName = "C:/Users/YANNIK~1/AppData/Local/Temp/2Tables.lines.pdf";
|
||||
String fileName = "files/bdr/notMergedParagraphs.pdf";
|
||||
String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
|
||||
LayoutGridService layoutGridService = new LayoutGridService();
|
||||
ViewerDocumentService viewerDocumentService = new ViewerDocumentService(layoutGridService);
|
||||
Document document = buildGraph(fileName, LayoutParsingType.TAAS);
|
||||
@ -55,48 +55,6 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testTableViewerDocument() {
|
||||
|
||||
String fileName = "C:\\Users\\YannikHampe\\repos\\layout-parser\\layoutparser-service\\layoutparser-service-server\\src\\test\\resources\\files\\SinglePages\\VV-931175_Page1.pdf";
|
||||
String tmpFileName = "C:/Users/YANNIK~1/AppData/Local/Temp/page1.lines.pdf";
|
||||
Document documentGraph = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
|
||||
Loader.loadPDF(Path.of(fileName).toFile()),
|
||||
new ImageServiceResponse(),
|
||||
new TableServiceResponse()));
|
||||
LayoutGridService layoutGridService = new LayoutGridService();
|
||||
ViewerDocumentService viewerDocumentService = new ViewerDocumentService(layoutGridService);
|
||||
try (var pdDocument = Loader.loadPDF(Path.of(fileName).toFile()); var out = new FileOutputStream(tmpFileName)) {
|
||||
viewerDocumentService.createViewerDocument(pdDocument, documentGraph, out, true);
|
||||
}
|
||||
//durch rows
|
||||
DocumentData documentData = DocumentDataMapper.toDocumentData(documentGraph);
|
||||
int emptyCellCount = 0;
|
||||
List listStructure2 = documentData.getDocumentStructure()
|
||||
.streamAllEntries()
|
||||
.filter(entryData -> entryData.getType().equals(NodeType.TABLE))
|
||||
.map(DocumentStructure.EntryData::getProperties)
|
||||
.map(properties -> {
|
||||
var builder = Table.builder();
|
||||
PropertiesMapper.parseTableProperties(properties, builder);
|
||||
return builder.build();
|
||||
}).toList();
|
||||
for(int i = 0; i < listStructure2.size(); i++) {
|
||||
emptyCellCount = ((Table) listStructure2.get(i)).getEmptyCells();
|
||||
}
|
||||
|
||||
System.out.println("Empty cells "+emptyCellCount);
|
||||
|
||||
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(Path.of(fileName).toFile()));
|
||||
TablePageBlock table = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0);
|
||||
int emptyCellsFoundFound = table.getRows().stream().flatMap(List::stream).toList().stream().filter(f -> f.toString().equals("")).toList().size();
|
||||
for (List<Cell> row : table.getRows()) {
|
||||
System.out.println(row.toString());
|
||||
}
|
||||
System.out.println("Actual number of empty rows: "+emptyCellsFoundFound);
|
||||
}
|
||||
|
||||
public ClassificationDocument buildClassificationDocument(PDDocument originDocument) {
|
||||
|
||||
ClassificationDocument classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
|
||||
|
||||
@ -40,26 +40,15 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
|
||||
@SneakyThrows
|
||||
public void textRulingExtraction() {
|
||||
|
||||
String fileName = "/files/102 S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf";
|
||||
String lineFileName = "C:/Users/YANNIK~1/AppData/Local/Temp/102 S-Metolachlor_RAR_02_Volume_2_2018-09-06.after.pdf";
|
||||
String fileName = "files/211.pdf";
|
||||
String lineFileName = "/tmp/" + Path.of(fileName).getFileName().toString() + "_LINES.pdf";
|
||||
List<PageContents> pageContents = PageContentExtractor.getSortedPageContents(fileName);
|
||||
RulingCleaningService rulingCleaningService = new RulingCleaningService();
|
||||
PdfDraw.drawLinesPerPage(fileName, pageContents.stream().map(PageContents::getRulings).toList(), lineFileName);
|
||||
List<CleanRulings> cleanRulingsPerPage = new LinkedList<>();
|
||||
writeJsons(Path.of(fileName));
|
||||
for (PageContents pageContent : pageContents) {
|
||||
cleanRulingsPerPage.add(rulingCleaningService.getCleanRulings(Collections.emptyList(), pageContent.getRulings()));
|
||||
}
|
||||
PdfDraw.drawLinesPerPage(fileName, pageContents.stream().map(PageContents::getRulings).toList(), lineFileName);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testTableExtractionSingle() {
|
||||
|
||||
String filename = "C:\\Users\\YannikHampe\\repos\\layout-parser\\layoutparser-service\\layoutparser-service-server\\src\\test\\resources\\files\\SinglePages\\24 - SYN549522 - Acute Oral Toxicity - Rats_Page17.pdf";
|
||||
writeJsons(Path.of(filename));
|
||||
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user