Merge branch 'RED-7434' into 'main'
RED-7434 - Remove Section Grid entirely See merge request fforesight/layout-parser!78
This commit is contained in:
commit
9abdc6d44d
@ -36,9 +36,6 @@ public record LayoutParsingRequest(
|
||||
@Schema(description = "Path where the Simplified Text File will be stored.")//
|
||||
@NonNull String simplifiedTextStorageId,//
|
||||
@Schema(description = "Path where the Viewer Document PDF will be stored.")//
|
||||
@NonNull String viewerDocumentStorageId,//
|
||||
@Deprecated//
|
||||
@Schema(description = "Path where the Section Grid will be stored.")//
|
||||
@NonNull String sectionGridStorageId) {
|
||||
@NonNull String viewerDocumentStorageId) {
|
||||
|
||||
}
|
||||
|
||||
@ -35,7 +35,6 @@ import com.knecon.fforesight.service.layoutparser.processor.python_api.model.tab
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.BodyTextFrameService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.RulingCleaningService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.SectionGridCreatorService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.SimplifiedSectionTextService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.TableExtractionService;
|
||||
@ -65,7 +64,6 @@ public class LayoutParsingPipeline {
|
||||
private final CvTableParsingAdapter cvTableParsingAdapter;
|
||||
private final LayoutParsingStorageService layoutParsingStorageService;
|
||||
private final SectionsBuilderService sectionsBuilderService;
|
||||
private final SectionGridCreatorService sectionGridCreatorService;
|
||||
private final TaasClassificationService taasClassificationService;
|
||||
private final RedactManagerClassificationService redactManagerClassificationService;
|
||||
private final DocuMineClassificationService docuMineClassificationService;
|
||||
@ -99,7 +97,6 @@ public class LayoutParsingPipeline {
|
||||
|
||||
int numberOfPages = originDocument.getNumberOfPages();
|
||||
|
||||
layoutParsingStorageService.storeSectionGrid(layoutParsingRequest, sectionGridCreatorService.createSectionGrid(documentGraph));
|
||||
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentGraph));
|
||||
layoutParsingStorageService.storeSimplifiedText(layoutParsingRequest, simplifiedSectionTextService.toSimplifiedText(documentGraph));
|
||||
|
||||
|
||||
@ -17,7 +17,6 @@ import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.section.SectionGrid;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedText;
|
||||
@ -84,11 +83,6 @@ public class LayoutParsingStorageService {
|
||||
}
|
||||
|
||||
|
||||
public void storeSectionGrid(LayoutParsingRequest layoutParsingRequest, SectionGrid sectionGrid) {
|
||||
|
||||
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.sectionGridStorageId(), sectionGrid);
|
||||
}
|
||||
|
||||
|
||||
public void storeResearchDocumentData(LayoutParsingRequest layoutParsingRequest, ResearchDocumentData researchDocumentData) {
|
||||
|
||||
|
||||
@ -3,7 +3,6 @@ package com.knecon.fforesight.service.layoutparser.processor.model;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.section.SectionGrid;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.StringFrequencyCounter;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.UnclassifiedText;
|
||||
|
||||
@ -25,7 +24,6 @@ public class ClassificationDocument {
|
||||
private StringFrequencyCounter fontStyleCounter = new StringFrequencyCounter();
|
||||
private boolean headlines;
|
||||
|
||||
private SectionGrid sectionGrid = new SectionGrid();
|
||||
private long rulesVersion;
|
||||
|
||||
}
|
||||
|
||||
@ -1,146 +0,0 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.function.BiConsumer;
|
||||
import java.util.function.BinaryOperator;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.stream.Collector;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.section.CellRectangle;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.section.SectionGrid;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.section.SectionRectangle;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class SectionGridCreatorService {
|
||||
|
||||
public SectionGrid createSectionGrid(Document document) {
|
||||
|
||||
Map<Integer, List<SectionRectangle>> sectionBBox = document.streamAllSubNodesOfType(NodeType.SECTION).map(SemanticNode::getBBox).collect(new SectionGridCollector());
|
||||
Map<Integer, List<SectionRectangle>> paragraphBBox = document.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBBox).collect(new SectionGridCollector());
|
||||
Map<Integer, List<SectionRectangle>> headlineBBox = document.streamAllSubNodesOfType(NodeType.HEADLINE).map(SemanticNode::getBBox).collect(new SectionGridCollector());
|
||||
Map<Integer, List<SectionRectangle>> tableBBox = document.streamAllSubNodesOfType(NodeType.TABLE).map(node -> (Table) node).collect(new TableGridCollector());
|
||||
var sectionGrid = new SectionGrid();
|
||||
|
||||
sectionGrid.setRectanglesPerPage(mergeMapsByConcatenatingLists(//
|
||||
mergeMapsByConcatenatingLists(paragraphBBox, headlineBBox), //
|
||||
mergeMapsByConcatenatingLists(sectionBBox, tableBBox)));
|
||||
|
||||
return sectionGrid;
|
||||
}
|
||||
|
||||
|
||||
private static abstract class GridCollector<T> implements Collector<T, Map<Integer, List<SectionRectangle>>, Map<Integer, List<SectionRectangle>>> {
|
||||
|
||||
@Override
|
||||
public Supplier<Map<Integer, List<SectionRectangle>>> supplier() {
|
||||
|
||||
return HashMap::new;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Function<Map<Integer, List<SectionRectangle>>, Map<Integer, List<SectionRectangle>>> finisher() {
|
||||
|
||||
return Function.identity();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public BinaryOperator<Map<Integer, List<SectionRectangle>>> combiner() {
|
||||
|
||||
return SectionGridCreatorService::mergeMapsByConcatenatingLists;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Set<Characteristics> characteristics() {
|
||||
|
||||
return Set.of(Characteristics.IDENTITY_FINISH, Characteristics.CONCURRENT, Characteristics.UNORDERED);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static class TableGridCollector extends GridCollector<Table> {
|
||||
|
||||
@Override
|
||||
public BiConsumer<Map<Integer, List<SectionRectangle>>, Table> accumulator() {
|
||||
|
||||
return (map, table) -> table.getPages()
|
||||
.forEach(page -> map.merge(page.getNumber(), List.of(toSectionRectangle(table, page, table.getPages().size())), SectionGridCreatorService::concatLists));
|
||||
}
|
||||
|
||||
|
||||
private static SectionRectangle toSectionRectangle(Table table, Page page, int numberOfParts) {
|
||||
|
||||
Rectangle2D rect = table.getBBox().get(page);
|
||||
List<CellRectangle> tableCellRectangles = table.streamTableCells()
|
||||
.map(TableCell::getBBox)
|
||||
.map(map -> map.get(page))
|
||||
.filter(Objects::nonNull)
|
||||
.map(rectangle2D -> new CellRectangle(new Point((float) rectangle2D.getX(), (float) rectangle2D.getY()),
|
||||
(float) rectangle2D.getWidth(),
|
||||
(float) rectangle2D.getHeight()))
|
||||
.toList();
|
||||
return new SectionRectangle(new Point((float) rect.getX(), (float) rect.getY()),
|
||||
(float) rect.getWidth(),
|
||||
(float) rect.getHeight(),
|
||||
1,
|
||||
numberOfParts,
|
||||
tableCellRectangles);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static class SectionGridCollector extends GridCollector<Map<Page, Rectangle2D>> {
|
||||
|
||||
@Override
|
||||
public BiConsumer<Map<Integer, List<SectionRectangle>>, Map<Page, Rectangle2D>> accumulator() {
|
||||
|
||||
return (mapToKeep, mapToMerge) -> mapToMerge.forEach((page, rectangle) -> mapToKeep.merge(page.getNumber(),
|
||||
List.of(toSectionRectangle(rectangle, mapToMerge.values().size())),
|
||||
SectionGridCreatorService::concatLists));
|
||||
|
||||
}
|
||||
|
||||
|
||||
private static SectionRectangle toSectionRectangle(Rectangle2D rect, int numberOfParts) {
|
||||
|
||||
return new SectionRectangle(new Point((float) rect.getX(), (float) rect.getY()), (float) rect.getWidth(), (float) rect.getHeight(), 1, numberOfParts, null);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
private static Map<Integer, List<SectionRectangle>> mergeMapsByConcatenatingLists(Map<Integer, List<SectionRectangle>> mapToKeep,
|
||||
Map<Integer, List<SectionRectangle>> mapToMerge) {
|
||||
|
||||
mapToMerge.forEach((page, rectangle) -> mapToKeep.merge(page, rectangle, SectionGridCreatorService::concatLists));
|
||||
return mapToKeep;
|
||||
}
|
||||
|
||||
|
||||
private static List<SectionRectangle> concatLists(List<SectionRectangle> l1, List<SectionRectangle> l2) {
|
||||
|
||||
return Stream.concat(l1.stream(), l2.stream()).toList();
|
||||
}
|
||||
|
||||
}
|
||||
@ -51,7 +51,6 @@ public abstract class AbstractTest {
|
||||
protected final static String POSITION_FILE_ID = "positions";
|
||||
protected final static String PAGES_FILE_ID = "pages";
|
||||
protected final static String TENANT_ID = "tenant";
|
||||
protected final static String SECTION_GRID_ID = "section";
|
||||
protected final static String VIEWER_DOCUMENT_ID = "viewer";
|
||||
protected final static String SIMPLIFIED_ID = "simplified";
|
||||
|
||||
@ -68,7 +67,6 @@ public abstract class AbstractTest {
|
||||
.positionBlockFileStorageId(POSITION_FILE_ID)
|
||||
.pageFileStorageId(PAGES_FILE_ID)
|
||||
.simplifiedTextStorageId(SIMPLIFIED_ID)
|
||||
.sectionGridStorageId(SECTION_GRID_ID)
|
||||
.viewerDocumentStorageId(VIEWER_DOCUMENT_ID)
|
||||
.build();
|
||||
}
|
||||
@ -114,7 +112,6 @@ public abstract class AbstractTest {
|
||||
.positionBlockFileStorageId(POSITION_FILE_ID)
|
||||
.pageFileStorageId(PAGES_FILE_ID)
|
||||
.simplifiedTextStorageId(SIMPLIFIED_ID)
|
||||
.sectionGridStorageId(SECTION_GRID_ID)
|
||||
.viewerDocumentStorageId(VIEWER_DOCUMENT_ID)
|
||||
.build();
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user