akra-certificate: change column detector slightly
* introduce settings
This commit is contained in:
parent
8e7bed8b52
commit
81231ae486
@ -96,6 +96,7 @@ public class LayoutParsingPipeline {
|
|||||||
VisualLayoutParsingAdapter visualLayoutParsingAdapter;
|
VisualLayoutParsingAdapter visualLayoutParsingAdapter;
|
||||||
ClarifyndClassificationService clarifyndClassificationService;
|
ClarifyndClassificationService clarifyndClassificationService;
|
||||||
GraphicExtractorService graphicExtractorService;
|
GraphicExtractorService graphicExtractorService;
|
||||||
|
LayoutparserSettings settings;
|
||||||
|
|
||||||
|
|
||||||
public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException {
|
public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException {
|
||||||
@ -104,24 +105,32 @@ public class LayoutParsingPipeline {
|
|||||||
log.info("Starting layout parsing for {}", layoutParsingRequest.identifier());
|
log.info("Starting layout parsing for {}", layoutParsingRequest.identifier());
|
||||||
|
|
||||||
File originFile = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId());
|
File originFile = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId());
|
||||||
File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()).orElse(originFile);
|
File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId())
|
||||||
|
.orElse(originFile);
|
||||||
|
|
||||||
VisualLayoutParsingResponse visualLayoutParsingResponse = new VisualLayoutParsingResponse();
|
VisualLayoutParsingResponse visualLayoutParsingResponse = new VisualLayoutParsingResponse();
|
||||||
if (layoutParsingRequest.visualLayoutParsingFileId().isPresent()) {
|
if (layoutParsingRequest.visualLayoutParsingFileId()
|
||||||
visualLayoutParsingResponse = layoutParsingStorageService.getVisualLayoutParsingFile(layoutParsingRequest.visualLayoutParsingFileId().get());
|
.isPresent()) {
|
||||||
|
visualLayoutParsingResponse = layoutParsingStorageService.getVisualLayoutParsingFile(layoutParsingRequest.visualLayoutParsingFileId()
|
||||||
|
.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
ImageServiceResponse imageServiceResponse = new ImageServiceResponse();
|
ImageServiceResponse imageServiceResponse = new ImageServiceResponse();
|
||||||
if (layoutParsingRequest.imagesFileStorageId().isPresent()) {
|
if (layoutParsingRequest.imagesFileStorageId()
|
||||||
imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId().get());
|
.isPresent()) {
|
||||||
|
imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId()
|
||||||
|
.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
TableServiceResponse tableServiceResponse = new TableServiceResponse();
|
TableServiceResponse tableServiceResponse = new TableServiceResponse();
|
||||||
if (layoutParsingRequest.tablesFileStorageId().isPresent()) {
|
if (layoutParsingRequest.tablesFileStorageId()
|
||||||
tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.tablesFileStorageId().get());
|
.isPresent()) {
|
||||||
|
tableServiceResponse = layoutParsingStorageService.getTablesFile(layoutParsingRequest.tablesFileStorageId()
|
||||||
|
.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
ClassificationDocument classificationDocument = parseLayout(LayoutParsingType.CLARIFYND_PARAGRAPH_DEBUG,
|
ClassificationDocument classificationDocument = parseLayout(settings.getLayoutParsingTypeOverride() == null //
|
||||||
|
? layoutParsingRequest.layoutParsingType() : settings.getLayoutParsingTypeOverride(),
|
||||||
originFile,
|
originFile,
|
||||||
imageServiceResponse,
|
imageServiceResponse,
|
||||||
tableServiceResponse,
|
tableServiceResponse,
|
||||||
@ -130,11 +139,17 @@ public class LayoutParsingPipeline {
|
|||||||
|
|
||||||
log.info("Building document graph for {}", layoutParsingRequest.identifier());
|
log.info("Building document graph for {}", layoutParsingRequest.identifier());
|
||||||
|
|
||||||
Document documentGraph = observeBuildDocumentGraph(LayoutParsingType.CLARIFYND_PARAGRAPH_DEBUG, classificationDocument);
|
Document documentGraph = observeBuildDocumentGraph(settings.getLayoutParsingTypeOverride() == null //
|
||||||
|
? layoutParsingRequest.layoutParsingType() : settings.getLayoutParsingTypeOverride(), classificationDocument);
|
||||||
|
|
||||||
log.info("Creating viewer document for {}", layoutParsingRequest.identifier());
|
log.info("Creating viewer document for {}", layoutParsingRequest.identifier());
|
||||||
|
|
||||||
layoutGridService.addLayoutGrid(viewerDocumentFile, documentGraph, viewerDocumentFile, false, layoutParsingRequest.visualLayoutParsingFileId().isPresent());
|
layoutGridService.addLayoutGrid(viewerDocumentFile,
|
||||||
|
documentGraph,
|
||||||
|
viewerDocumentFile,
|
||||||
|
false,
|
||||||
|
layoutParsingRequest.visualLayoutParsingFileId()
|
||||||
|
.isPresent());
|
||||||
|
|
||||||
log.info("Storing resulting files for {}", layoutParsingRequest.identifier());
|
log.info("Storing resulting files for {}", layoutParsingRequest.identifier());
|
||||||
|
|
||||||
@ -224,7 +239,9 @@ public class LayoutParsingPipeline {
|
|||||||
Map<Integer, List<ClassifiedImage>> signatures = visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse);
|
Map<Integer, List<ClassifiedImage>> signatures = visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse);
|
||||||
ClassificationDocument classificationDocument = new ClassificationDocument();
|
ClassificationDocument classificationDocument = new ClassificationDocument();
|
||||||
|
|
||||||
classificationDocument.getVisualizations().setActive(identifier.containsKey("debug"));
|
if (settings.isDebug() || identifier.containsKey("debug")) {
|
||||||
|
classificationDocument.getVisualizations().setActive(true);
|
||||||
|
}
|
||||||
|
|
||||||
List<ClassificationPage> classificationPages = new ArrayList<>();
|
List<ClassificationPage> classificationPages = new ArrayList<>();
|
||||||
|
|
||||||
|
|||||||
@ -0,0 +1,20 @@
|
|||||||
|
package com.knecon.fforesight.service.layoutparser.processor;
|
||||||
|
|
||||||
|
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||||
|
|
||||||
|
import lombok.AccessLevel;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.experimental.FieldDefaults;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
@Configuration
|
||||||
|
@ConfigurationProperties("layoutparser")
|
||||||
|
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||||
|
public class LayoutparserSettings {
|
||||||
|
|
||||||
|
boolean debug;
|
||||||
|
LayoutParsingType layoutParsingTypeOverride;
|
||||||
|
}
|
||||||
@ -16,6 +16,7 @@ public class ColumnDetector {
|
|||||||
final static int globalEndIdx = bins_num; // i chose 7, since thirds seems a likely split for columns, therefore divided by 6 would eliminate those.
|
final static int globalEndIdx = bins_num; // i chose 7, since thirds seems a likely split for columns, therefore divided by 6 would eliminate those.
|
||||||
public static final double DERIVATIVE_ZERO_THRESHOLD = 1e-10;
|
public static final double DERIVATIVE_ZERO_THRESHOLD = 1e-10;
|
||||||
public static final double MINIMUM_THRESHOLD_FOR_COLUMNS = 0.05;
|
public static final double MINIMUM_THRESHOLD_FOR_COLUMNS = 0.05;
|
||||||
|
public static final double NEAR_GLOBAL_THRESHOLD = 0.5;
|
||||||
double minY;
|
double minY;
|
||||||
double maxY;
|
double maxY;
|
||||||
double midY;
|
double midY;
|
||||||
@ -237,10 +238,10 @@ public class ColumnDetector {
|
|||||||
List<Integer> nearGlobalDvMaximaIdx = new LinkedList<>();
|
List<Integer> nearGlobalDvMaximaIdx = new LinkedList<>();
|
||||||
List<Integer> nearGlobalDvMinimaIdx = new LinkedList<>();
|
List<Integer> nearGlobalDvMinimaIdx = new LinkedList<>();
|
||||||
for (int i = globalStartIdx; i < globalEndIdx; i++) {
|
for (int i = globalStartIdx; i < globalEndIdx; i++) {
|
||||||
if (derivative[i] <= minDvValue * 0.8) {
|
if (derivative[i] <= minDvValue * NEAR_GLOBAL_THRESHOLD) {
|
||||||
nearGlobalDvMinimaIdx.add(i);
|
nearGlobalDvMinimaIdx.add(i);
|
||||||
}
|
}
|
||||||
if (derivative[i] >= maxDvValue * 0.8) {
|
if (derivative[i] >= maxDvValue * NEAR_GLOBAL_THRESHOLD) {
|
||||||
nearGlobalDvMaximaIdx.add(i);
|
nearGlobalDvMaximaIdx.add(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -68,7 +68,7 @@ public class LayoutGridService {
|
|||||||
public void addLayoutGrid(File originFile, Document document, File destinationFile, boolean layerVisibilityDefaultValue, boolean writeVisualLayoutParsingGrid) {
|
public void addLayoutGrid(File originFile, Document document, File destinationFile, boolean layerVisibilityDefaultValue, boolean writeVisualLayoutParsingGrid) {
|
||||||
|
|
||||||
List<Visualizations> allVisualizations;
|
List<Visualizations> allVisualizations;
|
||||||
Visualizations layoutGrid = this.addLayoutGrid(document, true, false);
|
Visualizations layoutGrid = this.addLayoutGrid(document, layerVisibilityDefaultValue, false);
|
||||||
if (writeVisualLayoutParsingGrid) {
|
if (writeVisualLayoutParsingGrid) {
|
||||||
Visualizations visualLayoutGrid = this.addLayoutGrid(document, layerVisibilityDefaultValue, true);
|
Visualizations visualLayoutGrid = this.addLayoutGrid(document, layerVisibilityDefaultValue, true);
|
||||||
allVisualizations = Stream.concat(Stream.of(layoutGrid, visualLayoutGrid), document.getVisualizations().streamAll())
|
allVisualizations = Stream.concat(Stream.of(layoutGrid, visualLayoutGrid), document.getVisualizations().streamAll())
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user