Merge branch 'RED-7434' into 'main'

RED-7434 - Remove Section Grid entirely

See merge request fforesight/layout-parser!78
This commit is contained in:
Kilian Schüttler 2023-10-20 10:07:01 +02:00
commit 9abdc6d44d
6 changed files with 1 additions and 164 deletions

View File

@ -36,9 +36,6 @@ public record LayoutParsingRequest(
@Schema(description = "Path where the Simplified Text File will be stored.")//
@NonNull String simplifiedTextStorageId,//
@Schema(description = "Path where the Viewer Document PDF will be stored.")//
@NonNull String viewerDocumentStorageId,//
@Deprecated//
@Schema(description = "Path where the Section Grid will be stored.")//
@NonNull String sectionGridStorageId) {
@NonNull String viewerDocumentStorageId) {
}

View File

@ -35,7 +35,6 @@ import com.knecon.fforesight.service.layoutparser.processor.python_api.model.tab
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.services.BodyTextFrameService;
import com.knecon.fforesight.service.layoutparser.processor.services.RulingCleaningService;
import com.knecon.fforesight.service.layoutparser.processor.services.SectionGridCreatorService;
import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
import com.knecon.fforesight.service.layoutparser.processor.services.SimplifiedSectionTextService;
import com.knecon.fforesight.service.layoutparser.processor.services.TableExtractionService;
@ -65,7 +64,6 @@ public class LayoutParsingPipeline {
private final CvTableParsingAdapter cvTableParsingAdapter;
private final LayoutParsingStorageService layoutParsingStorageService;
private final SectionsBuilderService sectionsBuilderService;
private final SectionGridCreatorService sectionGridCreatorService;
private final TaasClassificationService taasClassificationService;
private final RedactManagerClassificationService redactManagerClassificationService;
private final DocuMineClassificationService docuMineClassificationService;
@ -99,7 +97,6 @@ public class LayoutParsingPipeline {
int numberOfPages = originDocument.getNumberOfPages();
layoutParsingStorageService.storeSectionGrid(layoutParsingRequest, sectionGridCreatorService.createSectionGrid(documentGraph));
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentGraph));
layoutParsingStorageService.storeSimplifiedText(layoutParsingRequest, simplifiedSectionTextService.toSimplifiedText(documentGraph));

View File

@ -17,7 +17,6 @@ import org.apache.pdfbox.pdmodel.PDDocument;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.section.SectionGrid;
import com.iqser.red.storage.commons.service.StorageService;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedText;
@ -84,11 +83,6 @@ public class LayoutParsingStorageService {
}
public void storeSectionGrid(LayoutParsingRequest layoutParsingRequest, SectionGrid sectionGrid) {
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.sectionGridStorageId(), sectionGrid);
}
public void storeResearchDocumentData(LayoutParsingRequest layoutParsingRequest, ResearchDocumentData researchDocumentData) {

View File

@ -3,7 +3,6 @@ package com.knecon.fforesight.service.layoutparser.processor.model;
import java.util.ArrayList;
import java.util.List;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.section.SectionGrid;
import com.knecon.fforesight.service.layoutparser.processor.model.text.StringFrequencyCounter;
import com.knecon.fforesight.service.layoutparser.processor.model.text.UnclassifiedText;
@ -25,7 +24,6 @@ public class ClassificationDocument {
private StringFrequencyCounter fontStyleCounter = new StringFrequencyCounter();
private boolean headlines;
private SectionGrid sectionGrid = new SectionGrid();
private long rulesVersion;
}

View File

@ -1,146 +0,0 @@
package com.knecon.fforesight.service.layoutparser.processor.services;
import java.awt.geom.Rectangle2D;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collector;
import java.util.stream.Stream;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.section.CellRectangle;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.section.SectionGrid;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.section.SectionRectangle;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
import lombok.RequiredArgsConstructor;
@Service
@RequiredArgsConstructor
public class SectionGridCreatorService {
public SectionGrid createSectionGrid(Document document) {
Map<Integer, List<SectionRectangle>> sectionBBox = document.streamAllSubNodesOfType(NodeType.SECTION).map(SemanticNode::getBBox).collect(new SectionGridCollector());
Map<Integer, List<SectionRectangle>> paragraphBBox = document.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBBox).collect(new SectionGridCollector());
Map<Integer, List<SectionRectangle>> headlineBBox = document.streamAllSubNodesOfType(NodeType.HEADLINE).map(SemanticNode::getBBox).collect(new SectionGridCollector());
Map<Integer, List<SectionRectangle>> tableBBox = document.streamAllSubNodesOfType(NodeType.TABLE).map(node -> (Table) node).collect(new TableGridCollector());
var sectionGrid = new SectionGrid();
sectionGrid.setRectanglesPerPage(mergeMapsByConcatenatingLists(//
mergeMapsByConcatenatingLists(paragraphBBox, headlineBBox), //
mergeMapsByConcatenatingLists(sectionBBox, tableBBox)));
return sectionGrid;
}
private static abstract class GridCollector<T> implements Collector<T, Map<Integer, List<SectionRectangle>>, Map<Integer, List<SectionRectangle>>> {
@Override
public Supplier<Map<Integer, List<SectionRectangle>>> supplier() {
return HashMap::new;
}
@Override
public Function<Map<Integer, List<SectionRectangle>>, Map<Integer, List<SectionRectangle>>> finisher() {
return Function.identity();
}
@Override
public BinaryOperator<Map<Integer, List<SectionRectangle>>> combiner() {
return SectionGridCreatorService::mergeMapsByConcatenatingLists;
}
@Override
public Set<Characteristics> characteristics() {
return Set.of(Characteristics.IDENTITY_FINISH, Characteristics.CONCURRENT, Characteristics.UNORDERED);
}
}
private static class TableGridCollector extends GridCollector<Table> {
@Override
public BiConsumer<Map<Integer, List<SectionRectangle>>, Table> accumulator() {
return (map, table) -> table.getPages()
.forEach(page -> map.merge(page.getNumber(), List.of(toSectionRectangle(table, page, table.getPages().size())), SectionGridCreatorService::concatLists));
}
private static SectionRectangle toSectionRectangle(Table table, Page page, int numberOfParts) {
Rectangle2D rect = table.getBBox().get(page);
List<CellRectangle> tableCellRectangles = table.streamTableCells()
.map(TableCell::getBBox)
.map(map -> map.get(page))
.filter(Objects::nonNull)
.map(rectangle2D -> new CellRectangle(new Point((float) rectangle2D.getX(), (float) rectangle2D.getY()),
(float) rectangle2D.getWidth(),
(float) rectangle2D.getHeight()))
.toList();
return new SectionRectangle(new Point((float) rect.getX(), (float) rect.getY()),
(float) rect.getWidth(),
(float) rect.getHeight(),
1,
numberOfParts,
tableCellRectangles);
}
}
private static class SectionGridCollector extends GridCollector<Map<Page, Rectangle2D>> {
@Override
public BiConsumer<Map<Integer, List<SectionRectangle>>, Map<Page, Rectangle2D>> accumulator() {
return (mapToKeep, mapToMerge) -> mapToMerge.forEach((page, rectangle) -> mapToKeep.merge(page.getNumber(),
List.of(toSectionRectangle(rectangle, mapToMerge.values().size())),
SectionGridCreatorService::concatLists));
}
private static SectionRectangle toSectionRectangle(Rectangle2D rect, int numberOfParts) {
return new SectionRectangle(new Point((float) rect.getX(), (float) rect.getY()), (float) rect.getWidth(), (float) rect.getHeight(), 1, numberOfParts, null);
}
}
private static Map<Integer, List<SectionRectangle>> mergeMapsByConcatenatingLists(Map<Integer, List<SectionRectangle>> mapToKeep,
Map<Integer, List<SectionRectangle>> mapToMerge) {
mapToMerge.forEach((page, rectangle) -> mapToKeep.merge(page, rectangle, SectionGridCreatorService::concatLists));
return mapToKeep;
}
private static List<SectionRectangle> concatLists(List<SectionRectangle> l1, List<SectionRectangle> l2) {
return Stream.concat(l1.stream(), l2.stream()).toList();
}
}

View File

@ -51,7 +51,6 @@ public abstract class AbstractTest {
protected final static String POSITION_FILE_ID = "positions";
protected final static String PAGES_FILE_ID = "pages";
protected final static String TENANT_ID = "tenant";
protected final static String SECTION_GRID_ID = "section";
protected final static String VIEWER_DOCUMENT_ID = "viewer";
protected final static String SIMPLIFIED_ID = "simplified";
@ -68,7 +67,6 @@ public abstract class AbstractTest {
.positionBlockFileStorageId(POSITION_FILE_ID)
.pageFileStorageId(PAGES_FILE_ID)
.simplifiedTextStorageId(SIMPLIFIED_ID)
.sectionGridStorageId(SECTION_GRID_ID)
.viewerDocumentStorageId(VIEWER_DOCUMENT_ID)
.build();
}
@ -114,7 +112,6 @@ public abstract class AbstractTest {
.positionBlockFileStorageId(POSITION_FILE_ID)
.pageFileStorageId(PAGES_FILE_ID)
.simplifiedTextStorageId(SIMPLIFIED_ID)
.sectionGridStorageId(SECTION_GRID_ID)
.viewerDocumentStorageId(VIEWER_DOCUMENT_ID)
.build();
}