RED-8670: add tables to idp result
* apparently i've fixed some error, where the ocr-service sometimes hangs
This commit is contained in:
parent
9ed9a3c37c
commit
1d1bd321c2
@ -1,25 +0,0 @@
|
||||
package com.knecon.fforesight.service.ocr.v1.api.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Getter
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class AzureAnalyzeResult {
|
||||
|
||||
@Builder.Default
|
||||
List<KeyValuePair> keyValuePairs = new ArrayList<>();
|
||||
@Builder.Default
|
||||
List<TextRegion> handWrittenText = new ArrayList<>();
|
||||
@Builder.Default
|
||||
List<Figure> figures = new ArrayList<>();
|
||||
|
||||
}
|
||||
@ -1,10 +1,11 @@
|
||||
package com.knecon.fforesight.service.ocr.v1.api.model;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import lombok.Builder;
|
||||
|
||||
@Builder
|
||||
public record Figure(Optional<TextRegion> caption, Region image) {
|
||||
public record Figure(TextRegion caption, Region image, List<TextRegion> footnotes) {
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,23 @@
|
||||
package com.knecon.fforesight.service.ocr.v1.api.model;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
public record IdpResult(List<KeyValuePair> keyValuePairs, List<TextRegion> handWrittenText, List<Figure> figures, List<Table> tables) {
|
||||
|
||||
public static IdpResult initSynchronized() {
|
||||
|
||||
return new IdpResult(Collections.synchronizedList(new LinkedList<>()),
|
||||
Collections.synchronizedList(new LinkedList<>()),
|
||||
Collections.synchronizedList(new LinkedList<>()),
|
||||
Collections.synchronizedList(new LinkedList<>()));
|
||||
}
|
||||
|
||||
|
||||
public static IdpResult empty() {
|
||||
|
||||
return new IdpResult(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), Collections.emptyList());
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,7 @@
|
||||
package com.knecon.fforesight.service.ocr.v1.api.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public record Table(TextRegion caption, int numberOfCols, int numberOfRows, List<TableCell> cells, List<TextRegion> footnotes, List<Region> bboxes) {
|
||||
|
||||
}
|
||||
@ -0,0 +1,5 @@
|
||||
package com.knecon.fforesight.service.ocr.v1.api.model;
|
||||
|
||||
public record TableCell(TextRegion textRegion, int row, int col, TableCellType kind) {
|
||||
|
||||
}
|
||||
@ -0,0 +1,5 @@
|
||||
package com.knecon.fforesight.service.ocr.v1.api.model;
|
||||
|
||||
public enum TableCellType {
|
||||
ROW_HEADER, COLUMN_HEADER, CONTENT, STUB_HEAD, DESCRIPTION
|
||||
}
|
||||
@ -21,6 +21,8 @@ public class NativeLibrariesInitializer {
|
||||
@Value("${pdftron.license:}")
|
||||
private String pdftronLicense;
|
||||
|
||||
@Value("${native-libs.path:}")
|
||||
private String nativeLibsPath;
|
||||
|
||||
@SneakyThrows
|
||||
@PostConstruct
|
||||
@ -32,8 +34,8 @@ public class NativeLibrariesInitializer {
|
||||
PDFNet.setTempPath("/tmp/pdftron");
|
||||
PDFNet.initialize(pdftronLicense);
|
||||
|
||||
log.info("Setting jna.library.path: {}", System.getenv("VCPKG_DYNAMIC_LIB"));
|
||||
System.setProperty("jna.library.path", System.getenv("VCPKG_DYNAMIC_LIB"));
|
||||
log.info("Setting jna.library.path: {}", nativeLibsPath);
|
||||
System.setProperty("jna.library.path", nativeLibsPath);
|
||||
|
||||
log.info("Asserting Native Libraries loaded");
|
||||
|
||||
|
||||
@ -0,0 +1,102 @@
|
||||
package com.knecon.fforesight.service.ocr.processor.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.azure.ai.documentintelligence.models.AnalyzeResult;
|
||||
import com.azure.ai.documentintelligence.models.DocumentPage;
|
||||
import com.azure.ai.documentintelligence.models.DocumentSpan;
|
||||
import com.azure.ai.documentintelligence.models.DocumentWord;
|
||||
|
||||
public class DocumentSpanLookup {
|
||||
|
||||
List<PageSpanLookup> documentWordLookup;
|
||||
|
||||
|
||||
public DocumentSpanLookup(AnalyzeResult analyzeResult) {
|
||||
|
||||
documentWordLookup = new ArrayList<>(analyzeResult.getPages().size());
|
||||
int offset = 0;
|
||||
for (DocumentPage page : analyzeResult.getPages()) {
|
||||
|
||||
if (page.getWords() == null || page.getWords().isEmpty()) {
|
||||
documentWordLookup.add(new PageSpanLookup(offset, offset, null));
|
||||
}
|
||||
int start = page.getWords()
|
||||
.get(0).getSpan().getOffset();
|
||||
DocumentSpan span = page.getWords()
|
||||
.get(page.getWords().size() - 1).getSpan();
|
||||
int end = span.getOffset() + span.getLength();
|
||||
SpanLookup<DocumentWord> pageWords = new SpanLookup<>(page.getWords()
|
||||
.stream(), DocumentWord::getSpan);
|
||||
documentWordLookup.add(new PageSpanLookup(start, end, pageWords));
|
||||
offset = end + 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public List<WordOnPage> findWordsOnPages(DocumentSpan documentSpan) {
|
||||
|
||||
if (documentSpan == null) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
int firstSmallerIdx = findIdxOfFirstSmallerObject(documentSpan);
|
||||
PageSpanLookup firstPage = documentWordLookup.get(firstSmallerIdx);
|
||||
List<WordOnPage> wordsOnPages = new ArrayList<>();
|
||||
for (int pageNumber = firstSmallerIdx; pageNumber < documentWordLookup.size(); pageNumber++) {
|
||||
PageSpanLookup page = documentWordLookup.get(pageNumber);
|
||||
if (page.end >= documentSpan.getOffset()) {
|
||||
break;
|
||||
}
|
||||
firstPage.wordSpanLookup.findElementsContainedInSpan(documentSpan)
|
||||
.stream()
|
||||
.map(documentWord -> new WordOnPage(documentWord, firstSmallerIdx))
|
||||
.forEach(wordsOnPages::add);
|
||||
}
|
||||
return wordsOnPages;
|
||||
}
|
||||
|
||||
|
||||
private int findIdxOfFirstSmallerObject(DocumentSpan documentSpan) {
|
||||
|
||||
int idx = Collections.binarySearch(documentWordLookup, new PageSpanLookup(documentSpan.getOffset(), -1, null), Comparator.comparing(PageSpanLookup::start));
|
||||
|
||||
if (idx >= 0) {
|
||||
return idx;
|
||||
} else {
|
||||
int insertionPoint = -(idx + 1);
|
||||
|
||||
if (insertionPoint == 0) {
|
||||
return -1;
|
||||
}
|
||||
var lastSmaller = documentWordLookup.get(insertionPoint - 1);
|
||||
for (int resultIdx = insertionPoint - 2; resultIdx >= 0; resultIdx--) {
|
||||
if (documentWordLookup.get(resultIdx).compareTo(lastSmaller) == 0) {
|
||||
return resultIdx + 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public record WordOnPage(DocumentWord documentWord, int pageNumber) {
|
||||
|
||||
}
|
||||
|
||||
private record PageSpanLookup(int start, int end, SpanLookup<DocumentWord> wordSpanLookup) implements Comparable<PageSpanLookup> {
|
||||
|
||||
@Override
|
||||
public int compareTo(PageSpanLookup o) {
|
||||
|
||||
return Integer.compare(start, o.start);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -5,6 +5,7 @@ import static com.knecon.fforesight.service.ocr.processor.model.Statistics.human
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
@ -13,6 +14,7 @@ import java.util.Set;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService;
|
||||
import com.iqser.red.pdftronlogic.commons.OCGWatermarkRemovalService;
|
||||
import com.iqser.red.pdftronlogic.commons.WatermarkRemovalService;
|
||||
@ -45,6 +47,7 @@ public class OCRService {
|
||||
BatchFactory batchFactory;
|
||||
AsyncOcrService asyncOcrService;
|
||||
OcrServiceSettings settings;
|
||||
ObjectMapper mapper;
|
||||
|
||||
|
||||
/**
|
||||
@ -145,6 +148,10 @@ public class OCRService {
|
||||
RotationCorrectionUtility.rotatePages(viewerDocumentFile.toPath(), viewerDocumentFile.toPath(), ocrResult.anglesPerPage());
|
||||
}
|
||||
|
||||
if (features.contains(AzureOcrFeature.IDP)) {
|
||||
saveAnalyzeResultFile(analyzeResultFile, ocrResult);
|
||||
}
|
||||
|
||||
supervisor.getStatistics().drawingPdfFinished();
|
||||
|
||||
supervisor.sendFinished();
|
||||
@ -154,4 +161,12 @@ public class OCRService {
|
||||
|
||||
}
|
||||
|
||||
|
||||
private void saveAnalyzeResultFile(File analyzeResultFile, OcrResult ocrResult) throws IOException {
|
||||
|
||||
try (var out = new FileOutputStream(analyzeResultFile)) {
|
||||
mapper.writeValue(out, ocrResult.idpResult());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -106,6 +106,7 @@ public class OcrResultPostProcessingPipeline {
|
||||
writableOcrResultList.add(builder.build());
|
||||
|
||||
}
|
||||
log.debug("Batch {}: finished post-processing.", batch.getIndex());
|
||||
return writableOcrResultList;
|
||||
}
|
||||
|
||||
|
||||
@ -90,6 +90,7 @@ public class ImageProcessingService {
|
||||
supervisor.markError(e.getMessage());
|
||||
} finally {
|
||||
supervisor.markPageFinished(processedImage);
|
||||
log.debug("Finished page: {}", processedImage.pageNumber());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,40 @@
|
||||
package com.knecon.fforesight.service.ocr.processor.utils;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class StringCleaningUtility {
|
||||
|
||||
public static final Pattern hyphenLineBreaks = Pattern.compile("[-~‐‒⁻−﹣゠⁓‑\\u00AD][\\r\\n]+");
|
||||
public static final Pattern linebreaks = Pattern.compile("[\\r\\n]+");
|
||||
public static final Pattern doubleWhitespaces = Pattern.compile("\\s{2,}");
|
||||
|
||||
|
||||
public static String cleanString(String value) {
|
||||
|
||||
String noHyphenLinebreaks = removeHyphenLinebreaks(value);
|
||||
String noLinebreaks = removeLinebreaks(noHyphenLinebreaks);
|
||||
return removeMultipleWhitespaces(noLinebreaks);
|
||||
}
|
||||
|
||||
|
||||
private String removeHyphenLinebreaks(String value) {
|
||||
|
||||
return hyphenLineBreaks.matcher(value).replaceAll("");
|
||||
}
|
||||
|
||||
|
||||
private String removeMultipleWhitespaces(String value) {
|
||||
|
||||
return doubleWhitespaces.matcher(value).replaceAll(" ");
|
||||
}
|
||||
|
||||
|
||||
private String removeLinebreaks(String value) {
|
||||
|
||||
return linebreaks.matcher(value).replaceAll(" ");
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,14 +1,14 @@
|
||||
package com.knecon.fforesight.service.ocr.processor.visualizations;
|
||||
|
||||
import com.azure.ai.documentintelligence.models.AnalyzeResult;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.AzureAnalyzeResult;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.IdpResult;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class AnalyzeResultMapper {
|
||||
|
||||
public AzureAnalyzeResult map(AnalyzeResult analyzeResult) {
|
||||
public IdpResult map(AnalyzeResult analyzeResult) {
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@ -0,0 +1,236 @@
|
||||
package com.knecon.fforesight.service.ocr.processor.visualizations.layers;
|
||||
|
||||
import static com.knecon.fforesight.service.ocr.processor.utils.StringCleaningUtility.cleanString;
|
||||
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
import com.azure.ai.documentintelligence.models.AnalyzeResult;
|
||||
import com.azure.ai.documentintelligence.models.BoundingRegion;
|
||||
import com.azure.ai.documentintelligence.models.DocumentCaption;
|
||||
import com.azure.ai.documentintelligence.models.DocumentFigure;
|
||||
import com.azure.ai.documentintelligence.models.DocumentFootnote;
|
||||
import com.azure.ai.documentintelligence.models.DocumentKeyValuePair;
|
||||
import com.azure.ai.documentintelligence.models.DocumentTable;
|
||||
import com.azure.ai.documentintelligence.models.DocumentTableCell;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.DocumentSpanLookup;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.utils.Rectangle2DBBoxCollector;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.IdpResult;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.Figure;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.KeyValuePair;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.Region;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.Table;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.TableCell;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.TableCellType;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.TextRegion;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.Getter;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Getter
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class IdpResultFactory {
|
||||
|
||||
IdpResult idpResult;
|
||||
Map<Integer, AffineTransform> resultToPageTransforms;
|
||||
Map<Integer, PageInformation> pageInformation;
|
||||
Map<Integer, Double> angles;
|
||||
boolean rotationCorrection;
|
||||
|
||||
|
||||
public IdpResultFactory(Map<Integer, AffineTransform> resultToPageTransforms,
|
||||
Map<Integer, PageInformation> pageInformation,
|
||||
Map<Integer, Double> angles,
|
||||
Set<AzureOcrFeature> features) {
|
||||
|
||||
this.angles = angles;
|
||||
|
||||
this.rotationCorrection = features.contains(AzureOcrFeature.ROTATION_CORRECTION);
|
||||
this.resultToPageTransforms = resultToPageTransforms;
|
||||
this.pageInformation = pageInformation;
|
||||
this.idpResult = IdpResult.initSynchronized();
|
||||
}
|
||||
|
||||
|
||||
public AffineTransform getResultToPageTransform(Integer pageNumber) {
|
||||
|
||||
AffineTransform transform = resultToPageTransforms.get(pageNumber);
|
||||
if (rotationCorrection) {
|
||||
PageInformation page = pageInformation.get(pageNumber);
|
||||
transform.preConcatenate(RotationCorrectionUtility.buildTransform(angles.get(pageNumber), page.width(), page.height(), false));
|
||||
}
|
||||
return transform;
|
||||
}
|
||||
|
||||
|
||||
public void addAnalyzeResult(AnalyzeResult analyzeResult, PageBatch batch) {
|
||||
|
||||
DocumentSpanLookup words = new DocumentSpanLookup(analyzeResult);
|
||||
analyzeResult.getTables()
|
||||
.forEach(documentTable -> addTable(documentTable, words, batch));
|
||||
analyzeResult.getKeyValuePairs()
|
||||
.forEach(documentKeyValuePair -> addKeyValuePair(documentKeyValuePair, batch));
|
||||
analyzeResult.getFigures()
|
||||
.forEach(documentFigure -> addFigure(documentFigure, batch, words));
|
||||
}
|
||||
|
||||
|
||||
private void addFigure(DocumentFigure documentFigure, PageBatch batch, DocumentSpanLookup words) {
|
||||
|
||||
List<TextRegion> footNotes = new LinkedList<>();
|
||||
if (documentFigure.getFootnotes() != null) {
|
||||
documentFigure.getFootnotes()
|
||||
.stream()
|
||||
.map(footNote -> toTextRegion(footNote, batch))
|
||||
.filter(Objects::nonNull)
|
||||
.forEach(footNotes::add);
|
||||
}
|
||||
int batchPageNumber = documentFigure.getBoundingRegions()
|
||||
.get(0).getPageNumber();
|
||||
Region bbox = toRegionFromRegions(batch.getPageNumber(batchPageNumber), documentFigure.getBoundingRegions());
|
||||
TextRegion caption = toTextRegion(documentFigure.getCaption(), batch);
|
||||
idpResult.figures().add(new Figure(caption, bbox, footNotes));
|
||||
|
||||
}
|
||||
|
||||
|
||||
private void addKeyValuePair(DocumentKeyValuePair documentKeyValuePair, PageBatch batch) {
|
||||
|
||||
TextRegion key = null;
|
||||
if (documentKeyValuePair.getKey() != null) {
|
||||
Region region = toRegionFromRegions(batch, documentKeyValuePair.getKey().getBoundingRegions());
|
||||
key = new TextRegion(region, cleanString(documentKeyValuePair.getKey().getContent()));
|
||||
}
|
||||
TextRegion value = null;
|
||||
if (documentKeyValuePair.getValue() != null) {
|
||||
Region region = toRegionFromRegions(batch, documentKeyValuePair.getValue().getBoundingRegions());
|
||||
value = new TextRegion(region, cleanString(documentKeyValuePair.getValue().getContent()));
|
||||
}
|
||||
|
||||
idpResult.keyValuePairs().add(new KeyValuePair(key, value));
|
||||
}
|
||||
|
||||
|
||||
private void addTable(DocumentTable documentTable, DocumentSpanLookup words, PageBatch batch) {
|
||||
|
||||
TextRegion caption = toTextRegion(documentTable.getCaption(), batch);
|
||||
List<TableCell> tableCells = documentTable.getCells()
|
||||
.stream()
|
||||
.map(documentTableCell -> toTableCell(documentTableCell, words, batch))
|
||||
.toList();
|
||||
List<TextRegion> footNotes = new LinkedList<>();
|
||||
|
||||
if (documentTable.getFootnotes() != null) {
|
||||
documentTable.getFootnotes()
|
||||
.stream()
|
||||
.map(footNote -> toTextRegion(footNote, batch))
|
||||
.filter(Objects::nonNull)
|
||||
.forEach(footNotes::add);
|
||||
}
|
||||
List<Region> bbox = documentTable.getBoundingRegions()
|
||||
.stream()
|
||||
.map(b -> toRegion(b, batch))
|
||||
.toList();
|
||||
Table table = new Table(caption, documentTable.getColumnCount(), documentTable.getRowCount(), tableCells, footNotes, bbox);
|
||||
idpResult.tables().add(table);
|
||||
}
|
||||
|
||||
|
||||
private TextRegion toTextRegion(DocumentFootnote footNote, PageBatch batch) {
|
||||
|
||||
if (footNote == null || footNote.getBoundingRegions().isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Region region = toRegionFromRegions(batch, footNote.getBoundingRegions());
|
||||
return new TextRegion(region, cleanString(footNote.getContent()));
|
||||
}
|
||||
|
||||
|
||||
private TextRegion toTextRegion(DocumentCaption caption, PageBatch batch) {
|
||||
|
||||
if (caption == null || caption.getBoundingRegions().isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Region region = toRegionFromRegions(batch, caption.getBoundingRegions());
|
||||
return new TextRegion(region, cleanString(caption.getContent()));
|
||||
}
|
||||
|
||||
|
||||
private TableCell toTableCell(DocumentTableCell documentTableCell, DocumentSpanLookup words, PageBatch batch) {
|
||||
|
||||
int batchPageNumber = documentTableCell.getBoundingRegions()
|
||||
.get(0).getPageNumber();
|
||||
Region region = toRegionFromRegions(batch.getPageNumber(batchPageNumber), documentTableCell.getBoundingRegions());
|
||||
TableCellType kind = mapTableCellType(documentTableCell);
|
||||
return new TableCell(new TextRegion(region, cleanString(documentTableCell.getContent())), documentTableCell.getRowIndex(), documentTableCell.getColumnIndex(), kind);
|
||||
}
|
||||
|
||||
|
||||
private static TableCellType mapTableCellType(DocumentTableCell documentTableCell) {
|
||||
|
||||
if (documentTableCell.getKind() == null) {
|
||||
return TableCellType.CONTENT;
|
||||
}
|
||||
return switch (documentTableCell.getKind().toString()) {
|
||||
case "columnHeader" -> TableCellType.COLUMN_HEADER;
|
||||
case "rowHeader" -> TableCellType.ROW_HEADER;
|
||||
case "description" -> TableCellType.DESCRIPTION;
|
||||
case "stubHead" -> TableCellType.STUB_HEAD;
|
||||
default -> TableCellType.CONTENT;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
private Region toRegion(BoundingRegion boundingRegion, PageBatch batch) {
|
||||
|
||||
int pageNumber = batch.getPageNumber(boundingRegion.getPageNumber());
|
||||
QuadPoint qp = QuadPoint.fromPolygons(boundingRegion.getPolygon()).getTransformed(getResultToPageTransform(pageNumber));
|
||||
return new Region(pageNumber, qp.toData());
|
||||
}
|
||||
|
||||
|
||||
private Region toRegionFromRegions(int pageNumber, List<BoundingRegion> regions) {
|
||||
|
||||
QuadPoint bbox = QuadPoint.fromRectangle2D(regions.stream()
|
||||
.map(BoundingRegion::getPolygon)
|
||||
.map(QuadPoint::fromPolygons)
|
||||
.map(qp -> qp.getTransformed(getResultToPageTransform(pageNumber)).getBounds2D())
|
||||
.collect(new Rectangle2DBBoxCollector()));
|
||||
|
||||
return new Region(pageNumber, bbox.toData());
|
||||
}
|
||||
|
||||
|
||||
private Region toRegionFromRegions(PageBatch batch, List<BoundingRegion> regions) {
|
||||
|
||||
assert !regions.isEmpty();
|
||||
int batchPageNumber = regions.get(0).getPageNumber();
|
||||
if (!regions.stream()
|
||||
.map(BoundingRegion::getPageNumber)
|
||||
.allMatch(number -> number == batchPageNumber)) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
int pageNumber = batch.getPageNumber(batchPageNumber);
|
||||
QuadPoint bbox = QuadPoint.fromRectangle2D(regions.stream()
|
||||
.map(BoundingRegion::getPolygon)
|
||||
.map(QuadPoint::fromPolygons)
|
||||
.map(qp -> qp.getTransformed(getResultToPageTransform(pageNumber)).getBounds2D())
|
||||
.collect(new Rectangle2DBBoxCollector()));
|
||||
|
||||
return new Region(pageNumber, bbox.toData());
|
||||
}
|
||||
|
||||
}
|
||||
@ -8,19 +8,22 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.azure.ai.documentintelligence.models.AnalyzeResult;
|
||||
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
|
||||
import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
|
||||
import com.knecon.fforesight.service.ocr.processor.service.OcrExecutionSupervisor;
|
||||
import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
|
||||
import com.knecon.fforesight.service.ocr.processor.service.OcrResultPostProcessingPipeline;
|
||||
import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
|
||||
import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResult;
|
||||
import com.knecon.fforesight.service.ocr.processor.service.OcrResultPostProcessingPipeline;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.IdpResult;
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
|
||||
import com.knecon.fforesight.service.viewerdoc.layers.LayerGroup;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class LayerFactory {
|
||||
|
||||
@ -29,6 +32,7 @@ public class LayerFactory {
|
||||
IdpLayerFactory idpLayerFactory;
|
||||
OcrDebugLayerFactory ocrDebugLayerFactory;
|
||||
OcrTextLayerFactory ocrTextLayerFactory;
|
||||
IdpResultFactory idpResultFactory;
|
||||
OcrServiceSettings settings;
|
||||
Set<AzureOcrFeature> features;
|
||||
Map<Integer, Double> angles;
|
||||
@ -48,13 +52,13 @@ public class LayerFactory {
|
||||
this.features = features;
|
||||
this.supervisor = supervisor;
|
||||
this.angles = Collections.synchronizedMap(new HashMap<>());
|
||||
this.idpResultFactory = new IdpResultFactory(ocrResultPostProcessingPipeline.getResultToPageTransforms(), pageInformation, angles, features);
|
||||
}
|
||||
|
||||
|
||||
public void processAnalyzeResult(PageBatch batch, AnalyzeResult analyzeResult) throws InterruptedException {
|
||||
|
||||
List<WritableOcrResult> results = ocrResultPostProcessingPipeline.processAnalyzeResult(analyzeResult, batch);
|
||||
|
||||
results.forEach(result -> angles.put(result.getPageNumber(), result.getAngle()));
|
||||
|
||||
ocrTextLayerFactory.addWritableOcrResult(results);
|
||||
@ -63,7 +67,10 @@ public class LayerFactory {
|
||||
ocrDebugLayerFactory.addAnalysisResult(results);
|
||||
}
|
||||
if (features.contains(AzureOcrFeature.IDP)) {
|
||||
log.info("Batch {}: Start building IDP stuff", batch.getIndex());
|
||||
idpLayerFactory.addAnalyzeResult(analyzeResult, batch);
|
||||
idpResultFactory.addAnalyzeResult(analyzeResult, batch);
|
||||
log.info("Batch {}: Finished building IDP stuff", batch.getIndex());
|
||||
}
|
||||
|
||||
this.supervisor.finishMappingResult(batch);
|
||||
@ -82,7 +89,8 @@ public class LayerFactory {
|
||||
if (features.contains(AzureOcrFeature.IDP)) {
|
||||
debugLayers.add(idpLayerFactory.getIdpLayer());
|
||||
}
|
||||
return new OcrResult(List.of(ocrTextLayer), debugLayers, angles);
|
||||
IdpResult idpResult = features.contains(AzureOcrFeature.IDP) ? idpResultFactory.getIdpResult() : null;
|
||||
return new OcrResult(List.of(ocrTextLayer), debugLayers, angles, idpResult);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -3,8 +3,9 @@ package com.knecon.fforesight.service.ocr.processor.visualizations.layers;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.knecon.fforesight.service.ocr.v1.api.model.IdpResult;
|
||||
import com.knecon.fforesight.service.viewerdoc.layers.LayerGroup;
|
||||
|
||||
public record OcrResult(List<LayerGroup> regularLayers, List<LayerGroup> debugLayers, Map<Integer, Double> anglesPerPage) {
|
||||
public record OcrResult(List<LayerGroup> regularLayers, List<LayerGroup> debugLayers, Map<Integer, Double> anglesPerPage, IdpResult idpResult) {
|
||||
|
||||
}
|
||||
@ -133,22 +133,32 @@ public class RotationCorrectionUtility {
|
||||
|
||||
public static AffineTransform buildTransform(double angle, double originalWidth, double originalHeight) {
|
||||
|
||||
return buildTransform(angle, originalWidth, originalHeight, true);
|
||||
}
|
||||
|
||||
|
||||
public static AffineTransform buildTransform(double angle, double originalWidth, double originalHeight, boolean quadrantRotation) {
|
||||
|
||||
int quadrants = getQuadrantRotation(angle);
|
||||
|
||||
double h = originalHeight;
|
||||
double w = originalWidth;
|
||||
|
||||
if (quadrants == 1 || quadrants == 3) {
|
||||
w = originalHeight;
|
||||
h = originalWidth;
|
||||
}
|
||||
AffineTransform quadrantRotationTransform = new AffineTransform();
|
||||
if (quadrantRotation) {
|
||||
|
||||
AffineTransform quadrantRotation = switch (quadrants) {
|
||||
case 1 -> new AffineTransform(0, 1, -1, 0, h, 0);
|
||||
case 2 -> new AffineTransform(-1, 0, 0, -1, w, h);
|
||||
case 3 -> new AffineTransform(0, -1, 1, 0, w - h, h);
|
||||
default -> new AffineTransform();
|
||||
};
|
||||
if (quadrants == 1 || quadrants == 3) {
|
||||
w = originalHeight;
|
||||
h = originalWidth;
|
||||
}
|
||||
|
||||
quadrantRotationTransform = switch (quadrants) {
|
||||
case 1 -> new AffineTransform(0, 1, -1, 0, h, 0);
|
||||
case 2 -> new AffineTransform(-1, 0, 0, -1, w, h);
|
||||
case 3 -> new AffineTransform(0, -1, 1, 0, w - h, h);
|
||||
default -> new AffineTransform();
|
||||
};
|
||||
}
|
||||
|
||||
double remainder = getRemainingAngle(angle, quadrants);
|
||||
double scale = getScalingFactor(remainder, w, h);
|
||||
@ -158,7 +168,7 @@ public class RotationCorrectionUtility {
|
||||
transform.rotate(Math.toRadians(remainder));
|
||||
transform.scale(scale, scale);
|
||||
transform.translate(-w / 2, -h / 2);
|
||||
transform.concatenate(quadrantRotation);
|
||||
transform.concatenate(quadrantRotationTransform);
|
||||
|
||||
return transform;
|
||||
}
|
||||
|
||||
@ -33,20 +33,23 @@ public class FileStorageService {
|
||||
public void storeFiles(DocumentRequest request, File documentFile, File viewerDocumentFile, File analyzeResultFile) {
|
||||
|
||||
try (var in = new FileInputStream(viewerDocumentFile)) {
|
||||
if (request.optionalViewerDocumentId().isPresent()) {
|
||||
if (request.optionalViewerDocumentId()
|
||||
.isPresent()) {
|
||||
storageService.storeObject(TenantContext.getTenantId(), request.getViewerDocId(), in);
|
||||
} else {
|
||||
storageService.storeObject(TenantContext.getTenantId(), getStorageId(request.getDossierId(), request.getFileId(), FileType.VIEWER_DOCUMENT), in);
|
||||
}
|
||||
}
|
||||
try (var in = new FileInputStream(documentFile)) {
|
||||
if (request.optionalOriginDocumentId().isPresent()) {
|
||||
if (request.optionalOriginDocumentId()
|
||||
.isPresent()) {
|
||||
storageService.storeObject(TenantContext.getTenantId(), request.getOriginDocumentId(), in);
|
||||
} else {
|
||||
storageService.storeObject(TenantContext.getTenantId(), getStorageId(request.getDossierId(), request.getFileId(), FileType.ORIGIN), in);
|
||||
}
|
||||
}
|
||||
if (request.optionalIdpResultId().isPresent()) {
|
||||
if (request.optionalIdpResultId()
|
||||
.isPresent() && analyzeResultFile.exists()) {
|
||||
try (var in = new FileInputStream(analyzeResultFile)) {
|
||||
storageService.storeObject(TenantContext.getTenantId(), request.getIdpResultId(), in);
|
||||
}
|
||||
@ -59,7 +62,8 @@ public class FileStorageService {
|
||||
|
||||
Files.createDirectories(documentFile.getParentFile().toPath());
|
||||
|
||||
String originDocumentId = request.optionalOriginDocumentId().orElse(getStorageId(request.getDossierId(), request.getFileId(), FileType.ORIGIN));
|
||||
String originDocumentId = request.optionalOriginDocumentId()
|
||||
.orElse(getStorageId(request.getDossierId(), request.getFileId(), FileType.ORIGIN));
|
||||
|
||||
storageService.downloadTo(TenantContext.getTenantId(), originDocumentId, documentFile);
|
||||
|
||||
|
||||
@ -63,3 +63,5 @@ azure:
|
||||
|
||||
ocrService:
|
||||
sendStatusUpdates: true
|
||||
|
||||
native-libs.path: ${VCPKG_DYNAMIC_LIB}
|
||||
@ -26,11 +26,11 @@ import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
// in order to run, the azure.key must be set first in the application.yml and you must set the env variable VCPKG_DYNAMIC_LIB to your tesseract and leptonica installation folder
|
||||
@Disabled
|
||||
//@Disabled
|
||||
@SpringBootTest()
|
||||
public class OcrServiceIntegrationTest extends AbstractTest {
|
||||
|
||||
public static final Set<AzureOcrFeature> FEATURES = Set.of(AzureOcrFeature.ROTATION_CORRECTION, AzureOcrFeature.FONT_STYLE_DETECTION);
|
||||
public static final Set<AzureOcrFeature> FEATURES = Set.of(AzureOcrFeature.ROTATION_CORRECTION, AzureOcrFeature.FONT_STYLE_DETECTION, AzureOcrFeature.IDP);
|
||||
@Autowired
|
||||
private OCRService ocrService;
|
||||
|
||||
@ -55,7 +55,7 @@ public class OcrServiceIntegrationTest extends AbstractTest {
|
||||
@SneakyThrows
|
||||
public void testOcrWithFile() {
|
||||
|
||||
testOCR("/home/kschuettler/Dokumente/LayoutparsingEvaluation/RAW_FILES/Difficult Headlines/VV-284053.pdf/VV-284053.pdf.ORIGIN.pdf");
|
||||
testOCR("/home/kschuettler/Dokumente/402Study.pdf");
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -2,8 +2,10 @@ persistence-service.url: "http://persistence-service-v1:8080"
|
||||
|
||||
pdftron.license: demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a
|
||||
azure:
|
||||
endpoint: https://ff-ocr-test.cognitiveservices.azure.com/
|
||||
key: # find key in Bitwarden under: Azure IDP Test Key
|
||||
endpoint: https://ff-ocr-dev.cognitiveservices.azure.com/
|
||||
key: 444fe2f83e9c48da8e588c7bd5295309 # find key in Bitwarden under: Azure IDP Test Key
|
||||
native-libs:
|
||||
|
||||
|
||||
logging.type: ${LOGGING_TYPE:CONSOLE}
|
||||
|
||||
@ -19,4 +21,5 @@ management:
|
||||
endpoints.web.exposure.include: prometheus, health, metrics
|
||||
metrics.export.prometheus.enabled: true
|
||||
|
||||
POD_NAME: azure-ocr-service
|
||||
POD_NAME: azure-ocr-service
|
||||
native-libs.path: /home/kschuettler/software/leptonica/vcpkg/installed/x64-linux-dynamic/lib/
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user