RED-8642: Use LineMode from cv-analysis-service instead of table cell mode

* fix and test case for me
This commit is contained in:
maverickstuder 2024-03-08 16:25:16 +01:00
parent ad1e44ca5c
commit 78fb6b825b
5 changed files with 36 additions and 4 deletions

View File

@ -37,9 +37,12 @@ public class CvTableParsingAdapter {
List<Ruling> cvParsedRulings = new ArrayList<>();
tableLines.forEach(l -> cvParsedRulings.add(new Ruling(new Point2D.Double(l.getX0(), l.getY0()), new Point2D.Double(l.getX1(), l.getY1()))));
tableLines.forEach(l -> cvParsedRulings.add(new Ruling(new Point2D.Double(l.getX1() * pageInfo.getWidth(), (1 - l.getY1()) * pageInfo.getHeight()),
new Point2D.Double(l.getX2() * pageInfo.getWidth(), (1 - l.getY2()) * pageInfo.getHeight()))));
return cvParsedRulings.stream().filter(ruling -> ruling.getWidth() < pageInfo.getWidth() * 0.98 && ruling.getHeight() < pageInfo.getHeight() * 0.98).toList();
return cvParsedRulings.stream()
.filter(ruling -> ruling.getWidth() < pageInfo.getWidth() * 0.98 && ruling.getHeight() < pageInfo.getHeight() * 0.98)
.toList();
}
}

View File

@ -11,9 +11,10 @@ import lombok.NoArgsConstructor;
@AllArgsConstructor
public class TableLine {
private float x0;
private float y0;
private float x1;
private float y1;
private float x2;
private float y2;
}

View File

@ -28,13 +28,16 @@ import com.knecon.fforesight.service.layoutparser.processor.model.Classification
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.CvTableParsingAdapter;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService;
import com.knecon.fforesight.service.layoutparser.server.utils.AbstractTest;
import com.knecon.fforesight.service.layoutparser.server.utils.visualizations.PdfDraw;
import lombok.SneakyThrows;
@ -52,6 +55,9 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Autowired
private SectionsBuilderService sectionsBuilderService;
@Autowired
private CvTableParsingAdapter cvTableParsingAdapter;
@SneakyThrows
public ClassificationDocument buildClassificationDocument(File originDocument, TableServiceResponse tableServiceResponse) {
@ -91,9 +97,11 @@ public class PdfSegmentationServiceTest extends AbstractTest {
@Test
@Disabled
public void tablesToHtmlDebuggerWithCVResponse() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/cv_tables/ScanRotationBorder.pdf");
// the format has changed and this is not up-to-date
ClassPathResource cvTablesResource = new ClassPathResource("files/cv_tables/ScanRotationBorder.TABLES.json");
var tableServiceResponse = objectMapper.readValue(cvTablesResource.getInputStream(), TableServiceResponse.class);
@ -103,6 +111,25 @@ public class PdfSegmentationServiceTest extends AbstractTest {
}
@Test
public void tablesToHtmlDebuggerWithLinesCVResponse() throws IOException {
String fileName = "files/cv_tables/VV-331340-first100.pdf";
String lineFileName = "/tmp/" + Path.of(fileName).getFileName().toString() + "_LINES.pdf";
ClassPathResource pdfFileResource = new ClassPathResource(fileName);
ClassPathResource cvTablesResource = new ClassPathResource("files/cv_tables/VV-331340-first100.TABLES.json");
var tableServiceResponse = objectMapper.readValue(cvTablesResource.getInputStream(), TableServiceResponse.class);
ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile(), tableServiceResponse);
Map<Integer, List<Ruling>> rulingsPerPage = cvTableParsingAdapter.buildCvParsedRulingsPerPage(tableServiceResponse);
toHtml(document, "/tmp/VV-331340-first100.html");
PdfDraw.drawLinesPerPage(fileName, rulingsPerPage.values().stream().toList(), lineFileName);
}
@Disabled
@Test
public void testScanRotationBorderIsIgnored() throws IOException {