* fixed tests

This commit is contained in:
yhampe 2023-11-16 07:51:08 +01:00
parent a6ba66b1aa
commit 84148d3b6e
2 changed files with 5 additions and 58 deletions

View File

@ -45,8 +45,8 @@ public class ViewerDocumentTest extends BuildDocumentTest {
@SneakyThrows
public void testViewerDocument() {
String fileName = "files/2Tables.pdf";
String tmpFileName = "C:/Users/YANNIK~1/AppData/Local/Temp/2Tables.lines.pdf";
String fileName = "files/bdr/notMergedParagraphs.pdf";
String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
LayoutGridService layoutGridService = new LayoutGridService();
ViewerDocumentService viewerDocumentService = new ViewerDocumentService(layoutGridService);
Document document = buildGraph(fileName, LayoutParsingType.TAAS);
@ -55,48 +55,6 @@ public class ViewerDocumentTest extends BuildDocumentTest {
}
}
@Test
@SneakyThrows
public void testTableViewerDocument() {
String fileName = "C:\\Users\\YannikHampe\\repos\\layout-parser\\layoutparser-service\\layoutparser-service-server\\src\\test\\resources\\files\\SinglePages\\VV-931175_Page1.pdf";
String tmpFileName = "C:/Users/YANNIK~1/AppData/Local/Temp/page1.lines.pdf";
Document documentGraph = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
Loader.loadPDF(Path.of(fileName).toFile()),
new ImageServiceResponse(),
new TableServiceResponse()));
LayoutGridService layoutGridService = new LayoutGridService();
ViewerDocumentService viewerDocumentService = new ViewerDocumentService(layoutGridService);
try (var pdDocument = Loader.loadPDF(Path.of(fileName).toFile()); var out = new FileOutputStream(tmpFileName)) {
viewerDocumentService.createViewerDocument(pdDocument, documentGraph, out, true);
}
//durch rows
DocumentData documentData = DocumentDataMapper.toDocumentData(documentGraph);
int emptyCellCount = 0;
List listStructure2 = documentData.getDocumentStructure()
.streamAllEntries()
.filter(entryData -> entryData.getType().equals(NodeType.TABLE))
.map(DocumentStructure.EntryData::getProperties)
.map(properties -> {
var builder = Table.builder();
PropertiesMapper.parseTableProperties(properties, builder);
return builder.build();
}).toList();
for(int i = 0; i < listStructure2.size(); i++) {
emptyCellCount = ((Table) listStructure2.get(i)).getEmptyCells();
}
System.out.println("Empty cells "+emptyCellCount);
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(Path.of(fileName).toFile()));
TablePageBlock table = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList().get(0);
int emptyCellsFoundFound = table.getRows().stream().flatMap(List::stream).toList().stream().filter(f -> f.toString().equals("")).toList().size();
for (List<Cell> row : table.getRows()) {
System.out.println(row.toString());
}
System.out.println("Actual number of empty rows: "+emptyCellsFoundFound);
}
public ClassificationDocument buildClassificationDocument(PDDocument originDocument) {
ClassificationDocument classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,

View File

@ -40,26 +40,15 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
@SneakyThrows
public void textRulingExtraction() {
String fileName = "/files/102 S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf";
String lineFileName = "C:/Users/YANNIK~1/AppData/Local/Temp/102 S-Metolachlor_RAR_02_Volume_2_2018-09-06.after.pdf";
String fileName = "files/211.pdf";
String lineFileName = "/tmp/" + Path.of(fileName).getFileName().toString() + "_LINES.pdf";
List<PageContents> pageContents = PageContentExtractor.getSortedPageContents(fileName);
RulingCleaningService rulingCleaningService = new RulingCleaningService();
PdfDraw.drawLinesPerPage(fileName, pageContents.stream().map(PageContents::getRulings).toList(), lineFileName);
List<CleanRulings> cleanRulingsPerPage = new LinkedList<>();
writeJsons(Path.of(fileName));
for (PageContents pageContent : pageContents) {
cleanRulingsPerPage.add(rulingCleaningService.getCleanRulings(Collections.emptyList(), pageContent.getRulings()));
}
PdfDraw.drawLinesPerPage(fileName, pageContents.stream().map(PageContents::getRulings).toList(), lineFileName);
}
@Test
@SneakyThrows
public void testTableExtractionSingle() {
String filename = "C:\\Users\\YannikHampe\\repos\\layout-parser\\layoutparser-service\\layoutparser-service-server\\src\\test\\resources\\files\\SinglePages\\24 - SYN549522 - Acute Oral Toxicity - Rats_Page17.pdf";
writeJsons(Path.of(filename));
}