Merge branch 'DM-589' into 'main'
DM-589: Filter wrong detected cells that borders from rotation at scanning See merge request fforesight/layout-parser!83
This commit is contained in:
commit
e2bcf971c9
@ -8,6 +8,7 @@ import java.util.Map;
|
|||||||
|
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.PageInfo;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCells;
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCells;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||||
|
|
||||||
@ -24,24 +25,26 @@ public class CvTableParsingAdapter {
|
|||||||
Map<Integer, List<TableCells>> tableCells = new HashMap<>();
|
Map<Integer, List<TableCells>> tableCells = new HashMap<>();
|
||||||
tableServiceResponse.getData()
|
tableServiceResponse.getData()
|
||||||
.forEach(tableData -> tableCells.computeIfAbsent(tableData.getPageInfo().getNumber(), tableCell -> new ArrayList<>())
|
.forEach(tableData -> tableCells.computeIfAbsent(tableData.getPageInfo().getNumber(), tableCell -> new ArrayList<>())
|
||||||
.addAll(convertTableCells(tableData.getTableCells())));
|
.addAll(convertTableCells(tableData.getTableCells(), tableData.getPageInfo())));
|
||||||
|
|
||||||
return tableCells;
|
return tableCells;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private Collection<TableCells> convertTableCells(List<TableCells> tableCells) {
|
private Collection<TableCells> convertTableCells(List<TableCells> tableCells, PageInfo pageInfo) {
|
||||||
|
|
||||||
List<TableCells> cvParsedTableCells = new ArrayList<>();
|
List<TableCells> cvParsedTableCells = new ArrayList<>();
|
||||||
|
|
||||||
tableCells.forEach(t -> cvParsedTableCells.add(TableCells.builder()
|
tableCells.stream()
|
||||||
.y0(t.getY0())
|
.filter(cell -> cell.getWidth() < pageInfo.getWidth() * 0.98 && cell.getHeight() < pageInfo.getHeight() * 0.98)
|
||||||
.x1(t.getX1())
|
.forEach(t -> cvParsedTableCells.add(TableCells.builder()
|
||||||
.y1(t.getY1())
|
.y0(t.getY0())
|
||||||
.x0(t.getX0())
|
.x1(t.getX1())
|
||||||
.width(t.getWidth())
|
.y1(t.getY1())
|
||||||
.height(t.getHeight())
|
.x0(t.getX0())
|
||||||
.build()));
|
.width(t.getWidth())
|
||||||
|
.height(t.getHeight())
|
||||||
|
.build()));
|
||||||
|
|
||||||
return cvParsedTableCells;
|
return cvParsedTableCells;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -61,12 +61,13 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
private SectionsBuilderService sectionsBuilderService;
|
private SectionsBuilderService sectionsBuilderService;
|
||||||
|
|
||||||
|
|
||||||
public ClassificationDocument buildClassificationDocument(PDDocument originDocument) {
|
@SneakyThrows
|
||||||
|
public ClassificationDocument buildClassificationDocument(PDDocument originDocument, TableServiceResponse tableServiceResponse) {
|
||||||
|
|
||||||
ClassificationDocument classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
|
ClassificationDocument classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
|
||||||
originDocument,
|
originDocument,
|
||||||
new ImageServiceResponse(),
|
new ImageServiceResponse(),
|
||||||
new TableServiceResponse());
|
tableServiceResponse);
|
||||||
|
|
||||||
redactManagerClassificationService.classifyDocument(classificationDocument);
|
redactManagerClassificationService.classifyDocument(classificationDocument);
|
||||||
|
|
||||||
@ -76,6 +77,13 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@SneakyThrows
|
||||||
|
public ClassificationDocument buildClassificationDocument(PDDocument originDocument) {
|
||||||
|
|
||||||
|
return buildClassificationDocument(originDocument, new TableServiceResponse());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void tablesToHtmlDebugger() throws IOException {
|
public void tablesToHtmlDebugger() throws IOException {
|
||||||
|
|
||||||
@ -88,6 +96,39 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void tablesToHtmlDebuggerWithCVResponse() throws IOException {
|
||||||
|
|
||||||
|
ClassPathResource pdfFileResource = new ClassPathResource("files/cv_tables/ScanRotationBorder.pdf");
|
||||||
|
ClassPathResource cvTablesResource = new ClassPathResource("files/cv_tables/ScanRotationBorder.TABLES.json");
|
||||||
|
var tableServiceResponse = objectMapper.readValue(cvTablesResource.getInputStream(), TableServiceResponse.class);
|
||||||
|
|
||||||
|
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getFile()), tableServiceResponse);
|
||||||
|
|
||||||
|
toHtml(document, "/tmp/ScanRotationBorder.html");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testScanRotationBorderIsIgnored() throws IOException {
|
||||||
|
|
||||||
|
ClassPathResource pdfFileResource = new ClassPathResource("files/cv_tables/ScanRotationBorder.pdf");
|
||||||
|
ClassPathResource cvTablesResource = new ClassPathResource("files/cv_tables/ScanRotationBorder.TABLES.json");
|
||||||
|
var tableServiceResponse = objectMapper.readValue(cvTablesResource.getInputStream(), TableServiceResponse.class);
|
||||||
|
|
||||||
|
ClassificationDocument document = buildClassificationDocument(Loader.loadPDF(pdfFileResource.getFile()), tableServiceResponse);
|
||||||
|
assertThat(document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).collect(Collectors.toList())).isNotEmpty();
|
||||||
|
var tables = document.getSections().stream().flatMap(paragraph -> paragraph.getTables().stream()).toList();
|
||||||
|
|
||||||
|
// Quality of the table parsing is not good, because the file is rotated at scanning.
|
||||||
|
// We only asset that the table border is not the page border.
|
||||||
|
tables.forEach(table -> {
|
||||||
|
assertThat(table.getMinX()).isGreaterThan(75);
|
||||||
|
assertThat(table.getMaxX()).isLessThan(512);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void testMapping() {
|
public void testMapping() {
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user