RED-5028: Integrated cv table service
This commit is contained in:
parent
97209a3508
commit
f6bc49d42c
@ -12,7 +12,7 @@
|
|||||||
<artifactId>redaction-service-api-v1</artifactId>
|
<artifactId>redaction-service-api-v1</artifactId>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<persistence-service.version>1.254.0</persistence-service.version>
|
<persistence-service.version>1.299.0</persistence-service.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|||||||
@ -2,6 +2,13 @@ package com.iqser.red.service.redaction.v1.server.classification.service;
|
|||||||
|
|
||||||
import static java.util.stream.Collectors.toSet;
|
import static java.util.stream.Collectors.toSet;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.FloatFrequencyCounter;
|
import com.iqser.red.service.redaction.v1.server.classification.model.FloatFrequencyCounter;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Orientation;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Orientation;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||||
@ -15,13 +22,6 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle
|
|||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||||
|
|
||||||
import org.springframework.stereotype.Service;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
@SuppressWarnings("all")
|
@SuppressWarnings("all")
|
||||||
public class BlockificationService {
|
public class BlockificationService {
|
||||||
@ -48,10 +48,12 @@ public class BlockificationService {
|
|||||||
boolean startFromTop = word.getY1() > maxY + word.getHeight();
|
boolean startFromTop = word.getY1() > maxY + word.getHeight();
|
||||||
boolean splitByX = prev != null && maxX + 50 < word.getX1() && prev.getY1() == word.getY1();
|
boolean splitByX = prev != null && maxX + 50 < word.getX1() && prev.getY1() == word.getY1();
|
||||||
boolean newLineAfterSplit = prev != null && word.getY1() != prev.getY1() && wasSplitted && splitX1 != word.getX1();
|
boolean newLineAfterSplit = prev != null && word.getY1() != prev.getY1() && wasSplitted && splitX1 != word.getX1();
|
||||||
boolean splittedByRuling = word.getRotation() == 0 && isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), verticalRulingLines) || word
|
boolean splittedByRuling =
|
||||||
.getRotation() == 0 && isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), horizontalRulingLines) || word
|
isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), verticalRulingLines) ||
|
||||||
.getRotation() == 90 && isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), horizontalRulingLines) || word
|
isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), horizontalRulingLines)
|
||||||
.getRotation() == 90 && isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), verticalRulingLines);
|
|
||||||
|
|| isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), horizontalRulingLines)
|
||||||
|
|| isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), verticalRulingLines);
|
||||||
|
|
||||||
if (prev != null && (lineSeparation || startFromTop || splitByX || newLineAfterSplit || splittedByRuling)) {
|
if (prev != null && (lineSeparation || startFromTop || splitByX || newLineAfterSplit || splittedByRuling)) {
|
||||||
|
|
||||||
|
|||||||
@ -48,7 +48,7 @@ public class RedactionController implements RedactionResource {
|
|||||||
|
|
||||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||||
try {
|
try {
|
||||||
Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null);
|
Document classifiedDoc = pdfSegmentationService.parseDocument(redactionRequest.getDossierId(), redactionRequest.getFileId(), storedObjectStream, null);
|
||||||
|
|
||||||
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||||
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
|
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
|
||||||
@ -74,7 +74,7 @@ public class RedactionController implements RedactionResource {
|
|||||||
|
|
||||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||||
try {
|
try {
|
||||||
Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null);
|
Document classifiedDoc = pdfSegmentationService.parseDocument(redactionRequest.getDossierId(), redactionRequest.getFileId(), storedObjectStream, null);
|
||||||
|
|
||||||
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||||
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
|
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
|
||||||
@ -101,7 +101,7 @@ public class RedactionController implements RedactionResource {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||||
classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null);
|
classifiedDoc = pdfSegmentationService.parseDocument(redactionRequest.getDossierId(), redactionRequest.getFileId(), storedObjectStream, null);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new RedactionException(e);
|
throw new RedactionException(e);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -0,0 +1,17 @@
|
|||||||
|
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.NonNull;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class PdfTable {
|
||||||
|
|
||||||
|
@NonNull
|
||||||
|
private List<PdfTableCell> tableCells = new ArrayList<>();
|
||||||
|
|
||||||
|
}
|
||||||
@ -0,0 +1,21 @@
|
|||||||
|
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
@Builder
|
||||||
|
@AllArgsConstructor
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class PdfTableCell {
|
||||||
|
|
||||||
|
private float x0;
|
||||||
|
private float y0;
|
||||||
|
private float x1;
|
||||||
|
private float y1;
|
||||||
|
private float width;
|
||||||
|
private float height;
|
||||||
|
|
||||||
|
}
|
||||||
@ -0,0 +1,16 @@
|
|||||||
|
package com.iqser.red.service.redaction.v1.server.redaction.model.table;
|
||||||
|
|
||||||
|
import com.dslplatform.json.CompiledJson;
|
||||||
|
|
||||||
|
import lombok.Data;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
@CompiledJson
|
||||||
|
public class PageInfo {
|
||||||
|
|
||||||
|
private int number;
|
||||||
|
private int rotation;
|
||||||
|
private float width;
|
||||||
|
private float height;
|
||||||
|
|
||||||
|
}
|
||||||
@ -0,0 +1,18 @@
|
|||||||
|
package com.iqser.red.service.redaction.v1.server.redaction.model.table;
|
||||||
|
|
||||||
|
import com.dslplatform.json.CompiledJson;
|
||||||
|
|
||||||
|
import lombok.Data;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
@CompiledJson
|
||||||
|
public class TableCells {
|
||||||
|
|
||||||
|
private float x0;
|
||||||
|
private float y0;
|
||||||
|
private float x1;
|
||||||
|
private float y1;
|
||||||
|
private float width;
|
||||||
|
private float height;
|
||||||
|
|
||||||
|
}
|
||||||
@ -0,0 +1,17 @@
|
|||||||
|
package com.iqser.red.service.redaction.v1.server.redaction.model.table;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.dslplatform.json.CompiledJson;
|
||||||
|
|
||||||
|
import lombok.Data;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
@CompiledJson
|
||||||
|
public class TableData {
|
||||||
|
|
||||||
|
private PageInfo pageInfo;
|
||||||
|
private List<TableCells> tableCells = new ArrayList<>();
|
||||||
|
|
||||||
|
}
|
||||||
@ -0,0 +1,21 @@
|
|||||||
|
package com.iqser.red.service.redaction.v1.server.redaction.model.table;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.dslplatform.json.CompiledJson;
|
||||||
|
|
||||||
|
import lombok.Data;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
@CompiledJson
|
||||||
|
public class TableServiceResponse {
|
||||||
|
private String dossierId;
|
||||||
|
private String fileId;
|
||||||
|
private String operation;
|
||||||
|
private String targetFileExtension;
|
||||||
|
private String responseFileExtension;
|
||||||
|
|
||||||
|
private List<TableData> data = new ArrayList<>();
|
||||||
|
|
||||||
|
}
|
||||||
@ -96,7 +96,8 @@ public class AnalyzeService {
|
|||||||
if (redactionServiceSettings.isEnableImageClassification()) {
|
if (redactionServiceSettings.isEnableImageClassification()) {
|
||||||
pdfImages = imageService.convertImages(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
pdfImages = imageService.convertImages(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||||
}
|
}
|
||||||
classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, pdfImages);
|
|
||||||
|
classifiedDoc = pdfSegmentationService.parseDocument(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), storedObjectStream, pdfImages);
|
||||||
pageCount = classifiedDoc.getPages().size();
|
pageCount = classifiedDoc.getPages().size();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new RedactionException(e);
|
throw new RedactionException(e);
|
||||||
|
|||||||
@ -9,6 +9,7 @@ import java.nio.file.attribute.FileAttribute;
|
|||||||
import java.nio.file.attribute.PosixFilePermission;
|
import java.nio.file.attribute.PosixFilePermission;
|
||||||
import java.nio.file.attribute.PosixFilePermissions;
|
import java.nio.file.attribute.PosixFilePermissions;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
@ -29,6 +30,8 @@ import com.iqser.red.service.redaction.v1.server.classification.service.Classifi
|
|||||||
import com.iqser.red.service.redaction.v1.server.parsing.PDFLinesTextStripper;
|
import com.iqser.red.service.redaction.v1.server.parsing.PDFLinesTextStripper;
|
||||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfTableCell;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
|
||||||
@ -42,15 +45,17 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
public class PdfSegmentationService {
|
public class PdfSegmentationService {
|
||||||
|
|
||||||
|
private final RedactionServiceSettings redactionServiceSettings;
|
||||||
private final RulingCleaningService rulingCleaningService;
|
private final RulingCleaningService rulingCleaningService;
|
||||||
private final TableExtractionService tableExtractionService;
|
private final TableExtractionService tableExtractionService;
|
||||||
private final BlockificationService blockificationService;
|
private final BlockificationService blockificationService;
|
||||||
private final ClassificationService classificationService;
|
private final ClassificationService classificationService;
|
||||||
private final SectionsBuilderService sectionsBuilderService;
|
private final SectionsBuilderService sectionsBuilderService;
|
||||||
private final ImageService imageService;
|
private final ImageService imageService;
|
||||||
|
private final TableService tableService;
|
||||||
|
|
||||||
|
|
||||||
public Document parseDocument(InputStream documentInputStream, Map<Integer, List<PdfImage>> pdfImages) throws IOException {
|
public Document parseDocument(String dossierId, String fileId, InputStream documentInputStream, Map<Integer, List<PdfImage>> pdfImages) throws IOException {
|
||||||
|
|
||||||
PDDocument pdDocument = null;
|
PDDocument pdDocument = null;
|
||||||
try {
|
try {
|
||||||
@ -67,6 +72,11 @@ public class PdfSegmentationService {
|
|||||||
tempFile.setExecutable(true, true);
|
tempFile.setExecutable(true, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Map<Integer, List<PdfTableCell>> pdfTableCells = new HashMap<>();
|
||||||
|
if (redactionServiceSettings.isCvServiceEnabled()) {
|
||||||
|
pdfTableCells = tableService.convertTables(dossierId, fileId);
|
||||||
|
}
|
||||||
|
|
||||||
try (var fos = new FileOutputStream(tempFile)) {
|
try (var fos = new FileOutputStream(tempFile)) {
|
||||||
IOUtils.copy(documentInputStream, fos);
|
IOUtils.copy(documentInputStream, fos);
|
||||||
|
|
||||||
@ -94,12 +104,8 @@ public class PdfSegmentationService {
|
|||||||
int rotation = pdPage.getRotation();
|
int rotation = pdPage.getRotation();
|
||||||
boolean isRotated = rotation != 0 && rotation != 360;
|
boolean isRotated = rotation != 0 && rotation != 360;
|
||||||
|
|
||||||
CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(stripper.getRulings(), stripper.getMinCharWidth(), stripper
|
CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(pdfTableCells.get(pageNumber), stripper.getRulings(), stripper.getMinCharWidth(), stripper.getMaxCharHeight());
|
||||||
.getMaxCharHeight());
|
Page page = blockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
|
||||||
|
|
||||||
Page page = blockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings
|
|
||||||
.getVertical());
|
|
||||||
|
|
||||||
|
|
||||||
PDRectangle cropbox = pdPage.getCropBox();
|
PDRectangle cropbox = pdPage.getCropBox();
|
||||||
float cropboxArea = cropbox.getHeight() * cropbox.getWidth();
|
float cropboxArea = cropbox.getHeight() * cropbox.getWidth();
|
||||||
@ -109,7 +115,6 @@ public class PdfSegmentationService {
|
|||||||
page.setLandscape(isLandscape || isRotated);
|
page.setLandscape(isLandscape || isRotated);
|
||||||
page.setPageNumber(pageNumber);
|
page.setPageNumber(pageNumber);
|
||||||
|
|
||||||
|
|
||||||
tableExtractionService.extractTables(cleanRulings, page);
|
tableExtractionService.extractTables(cleanRulings, page);
|
||||||
buildPageStatistics(page);
|
buildPageStatistics(page);
|
||||||
increaseDocumentStatistics(page, document);
|
increaseDocumentStatistics(page, document);
|
||||||
@ -128,7 +133,6 @@ public class PdfSegmentationService {
|
|||||||
sectionsBuilderService.buildSections(document);
|
sectionsBuilderService.buildSections(document);
|
||||||
sectionsBuilderService.addImagesToSections(document);
|
sectionsBuilderService.addImagesToSections(document);
|
||||||
|
|
||||||
|
|
||||||
IOUtils.close(pdDocument);
|
IOUtils.close(pdDocument);
|
||||||
|
|
||||||
if (!tempFile.delete()) {
|
if (!tempFile.delete()) {
|
||||||
|
|||||||
@ -0,0 +1,54 @@
|
|||||||
|
package com.iqser.red.service.redaction.v1.server.segmentation;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfTableCell;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.model.table.TableCells;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.model.table.TableServiceResponse;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.SneakyThrows;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class TableService {
|
||||||
|
|
||||||
|
private final ObjectMapper objectMapper;
|
||||||
|
private final RedactionStorageService redactionStorageService;
|
||||||
|
|
||||||
|
|
||||||
|
@SneakyThrows
|
||||||
|
public Map<Integer, List<PdfTableCell>> convertTables(String dossierId, String fileId) {
|
||||||
|
|
||||||
|
var tableClassificationStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(dossierId, fileId, FileType.TABLES));
|
||||||
|
|
||||||
|
TableServiceResponse tableServiceResponse = objectMapper.readValue(tableClassificationStream, TableServiceResponse.class);
|
||||||
|
|
||||||
|
Map<Integer, List<PdfTableCell>> tableCells = new HashMap<>();
|
||||||
|
tableServiceResponse.getData().forEach(tableData -> tableCells.computeIfAbsent(tableData.getPageInfo().getNumber(), tableCell -> new ArrayList<>()).addAll(convertTableCells(tableData.getTableCells())));
|
||||||
|
|
||||||
|
return tableCells;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private Collection<? extends PdfTableCell> convertTableCells(List<TableCells> tableCells) {
|
||||||
|
|
||||||
|
List<PdfTableCell> pdfTableCells = new ArrayList<>();
|
||||||
|
|
||||||
|
tableCells.forEach(t -> pdfTableCells.add(PdfTableCell.builder().y0(t.getY0()).x1(t.getX1()).y1(t.getY1()).x0(t.getX0()).width(t.getWidth()).height(t.getHeight()).build()));
|
||||||
|
|
||||||
|
return pdfTableCells;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -14,6 +14,8 @@ public class RedactionServiceSettings {
|
|||||||
|
|
||||||
private boolean enableImageClassification = true;
|
private boolean enableImageClassification = true;
|
||||||
|
|
||||||
|
private boolean cvServiceEnabled = true;
|
||||||
|
|
||||||
private float maxImageCropboxRatio = 0.9f;
|
private float maxImageCropboxRatio = 0.9f;
|
||||||
|
|
||||||
private int analysisVersion = 1;
|
private int analysisVersion = 1;
|
||||||
|
|||||||
@ -1,13 +1,22 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
|
||||||
|
import org.apache.commons.collections4.CollectionUtils;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||||
|
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
import lombok.Setter;
|
import lombok.Setter;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.apache.commons.collections4.CollectionUtils;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class Table extends AbstractTextContainer {
|
public class Table extends AbstractTextContainer {
|
||||||
@ -160,7 +169,7 @@ public class Table extends AbstractTextContainer {
|
|||||||
for (int i = unrotatedColCount - 1; i >= 0; i--) { // rows
|
for (int i = unrotatedColCount - 1; i >= 0; i--) { // rows
|
||||||
List<Cell> lastRow = new ArrayList<>();
|
List<Cell> lastRow = new ArrayList<>();
|
||||||
for (int j = 0; j < unrotatedRowCount; j++) { // cols
|
for (int j = 0; j < unrotatedRowCount; j++) { // cols
|
||||||
Cell cell = cells.get(new CellPosition(i, j));
|
Cell cell = cells.get(new CellPosition(j, i));
|
||||||
if (cell != null) {
|
if (cell != null) {
|
||||||
lastRow.add(cell);
|
lastRow.add(cell);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,18 +1,34 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.tableextraction.service;
|
package com.iqser.red.service.redaction.v1.server.tableextraction.service;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
|
||||||
import org.springframework.stereotype.Service;
|
|
||||||
|
|
||||||
import java.awt.geom.Line2D;
|
import java.awt.geom.Line2D;
|
||||||
import java.awt.geom.Point2D;
|
import java.awt.geom.Point2D;
|
||||||
import java.util.*;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfTableCell;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
@Service
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
public class RulingCleaningService {
|
public class RulingCleaningService {
|
||||||
|
|
||||||
public CleanRulings getCleanRulings(List<Ruling> rulings, float minCharWidth, float maxCharHeight) {
|
private final RedactionServiceSettings redactionServiceSettings;
|
||||||
|
|
||||||
|
public CleanRulings getCleanRulings(List<PdfTableCell> pdfTableCells, List<Ruling> rulings, float minCharWidth, float maxCharHeight) {
|
||||||
|
|
||||||
if (!rulings.isEmpty()) {
|
if (!rulings.isEmpty()) {
|
||||||
snapPoints(rulings, minCharWidth, maxCharHeight);
|
snapPoints(rulings, minCharWidth, maxCharHeight);
|
||||||
@ -24,6 +40,9 @@ public class RulingCleaningService {
|
|||||||
vrs.add(vr);
|
vrs.add(vr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (vrs.isEmpty() && redactionServiceSettings.isCvServiceEnabled()) {
|
||||||
|
vrs.addAll(extractVerticalRulings(pdfTableCells));
|
||||||
|
}
|
||||||
List<Ruling> verticalRulingLines = collapseOrientedRulings(vrs);
|
List<Ruling> verticalRulingLines = collapseOrientedRulings(vrs);
|
||||||
|
|
||||||
List<Ruling> hrs = new ArrayList<>();
|
List<Ruling> hrs = new ArrayList<>();
|
||||||
@ -32,6 +51,9 @@ public class RulingCleaningService {
|
|||||||
hrs.add(hr);
|
hrs.add(hr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (hrs.isEmpty() && redactionServiceSettings.isCvServiceEnabled()) {
|
||||||
|
hrs.addAll(extractHorizontalRulings(pdfTableCells));
|
||||||
|
}
|
||||||
List<Ruling> horizontalRulingLines = collapseOrientedRulings(hrs);
|
List<Ruling> horizontalRulingLines = collapseOrientedRulings(hrs);
|
||||||
|
|
||||||
return CleanRulings.builder().vertical(verticalRulingLines).horizontal(horizontalRulingLines).build();
|
return CleanRulings.builder().vertical(verticalRulingLines).horizontal(horizontalRulingLines).build();
|
||||||
@ -113,6 +135,60 @@ public class RulingCleaningService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private Collection<? extends Ruling> extractVerticalRulings(List<PdfTableCell> pdfTableCells) {
|
||||||
|
|
||||||
|
List<Ruling> vrs = new ArrayList<>();
|
||||||
|
|
||||||
|
if (pdfTableCells != null) {
|
||||||
|
for (PdfTableCell pdfTableCell : pdfTableCells) {
|
||||||
|
Ruling leftLine = createRuling(pdfTableCell.getX0(), pdfTableCell.getX0(), pdfTableCell.getY0(), pdfTableCell.getY1());
|
||||||
|
Ruling rightLine = createRuling(pdfTableCell.getX1(), pdfTableCell.getX1(), pdfTableCell.getY0(), pdfTableCell.getY1());
|
||||||
|
vrs.add(leftLine);
|
||||||
|
vrs.add(rightLine);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return vrs;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private Collection<? extends Ruling> extractHorizontalRulings(List<PdfTableCell> pdfTableCells) {
|
||||||
|
|
||||||
|
List<Ruling> hrs = new ArrayList<>();
|
||||||
|
|
||||||
|
if (pdfTableCells != null) {
|
||||||
|
for (PdfTableCell pdfTableCell : pdfTableCells) {
|
||||||
|
Ruling topLine = createRuling(pdfTableCell.getX0(), pdfTableCell.getX1(), pdfTableCell.getY1(), pdfTableCell.getY1());
|
||||||
|
Ruling baseLine = createRuling(pdfTableCell.getX0(), pdfTableCell.getX1(), pdfTableCell.getY0(), pdfTableCell.getY0());
|
||||||
|
hrs.add(topLine);
|
||||||
|
hrs.add(baseLine);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return hrs;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private Ruling createRuling(float tableCellX0, float tableCellX1, float tableCellY0, float tableCellY1) {
|
||||||
|
|
||||||
|
float x0 = tableCellX0;
|
||||||
|
float x1 = tableCellX1;
|
||||||
|
float y0 = tableCellY0;
|
||||||
|
float y1 = tableCellY1;
|
||||||
|
|
||||||
|
if (x1 < x0) {
|
||||||
|
x0 = tableCellX1;
|
||||||
|
x1 = tableCellX0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (y1 < y0) {
|
||||||
|
y0 = tableCellY1;
|
||||||
|
y1 = tableCellY0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Ruling(new Point2D.Float(x0, y0), new Point2D.Float(x1, y1));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private List<Ruling> collapseOrientedRulings(List<Ruling> lines) {
|
private List<Ruling> collapseOrientedRulings(List<Ruling> lines) {
|
||||||
|
|
||||||
int COLINEAR_OR_PARALLEL_PIXEL_EXPAND_AMOUNT = 1;
|
int COLINEAR_OR_PARALLEL_PIXEL_EXPAND_AMOUNT = 1;
|
||||||
|
|||||||
@ -1,36 +1,29 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server;
|
package com.iqser.red.service.redaction.v1.server;
|
||||||
|
|
||||||
import com.amazonaws.services.s3.AmazonS3;
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
import com.fasterxml.jackson.core.type.TypeReference;
|
import static org.mockito.Mockito.when;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus;
|
import java.io.BufferedReader;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Comment;
|
import java.io.ByteArrayInputStream;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
|
import java.io.File;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle;
|
import java.io.FileOutputStream;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.*;
|
import java.io.IOException;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.model.common.JSONPrimitive;
|
import java.io.InputStream;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
|
import java.io.InputStreamReader;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
|
import java.net.URL;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
|
import java.nio.charset.StandardCharsets;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
|
import java.time.OffsetDateTime;
|
||||||
import com.iqser.red.service.redaction.v1.model.*;
|
import java.time.ZoneOffset;
|
||||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
|
import java.util.ArrayList;
|
||||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
|
import java.util.Collections;
|
||||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService;
|
import java.util.HashMap;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
import java.util.HashSet;
|
||||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
import java.util.List;
|
||||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
import java.util.Map;
|
||||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
import java.util.Set;
|
||||||
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
import java.util.UUID;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
|
import java.util.stream.Collectors;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
|
||||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
|
||||||
import com.iqser.red.storage.commons.service.StorageService;
|
|
||||||
import lombok.SneakyThrows;
|
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Ignore;
|
import org.junit.Ignore;
|
||||||
@ -55,16 +48,49 @@ import org.springframework.context.annotation.Primary;
|
|||||||
import org.springframework.core.io.ClassPathResource;
|
import org.springframework.core.io.ClassPathResource;
|
||||||
import org.springframework.test.context.junit4.SpringRunner;
|
import org.springframework.test.context.junit4.SpringRunner;
|
||||||
|
|
||||||
import java.io.*;
|
import com.amazonaws.services.s3.AmazonS3;
|
||||||
import java.net.URL;
|
import com.fasterxml.jackson.core.type.TypeReference;
|
||||||
import java.nio.charset.StandardCharsets;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import java.time.OffsetDateTime;
|
import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus;
|
||||||
import java.time.ZoneOffset;
|
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Comment;
|
||||||
import java.util.*;
|
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
|
||||||
import java.util.stream.Collectors;
|
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.IdRemoval;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualForceRedaction;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualLegalBasisChange;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualRedactionEntry;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualResizeRedaction;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.model.common.JSONPrimitive;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
|
||||||
|
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
||||||
|
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
|
||||||
|
import com.iqser.red.service.redaction.v1.model.FileAttribute;
|
||||||
|
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||||
|
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
||||||
|
import com.iqser.red.service.redaction.v1.model.RedactionResult;
|
||||||
|
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||||
|
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||||
|
import com.iqser.red.storage.commons.service.StorageService;
|
||||||
|
|
||||||
import static org.assertj.core.api.Assertions.assertThat;
|
import lombok.SneakyThrows;
|
||||||
import static org.mockito.Mockito.when;
|
|
||||||
|
|
||||||
@RunWith(SpringRunner.class)
|
@RunWith(SpringRunner.class)
|
||||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||||
@ -268,9 +294,8 @@ public class RedactionIntegrationTest {
|
|||||||
public void testMergedImages() throws IOException {
|
public void testMergedImages() throws IOException {
|
||||||
|
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/merge_images.pdf");
|
|
||||||
|
|
||||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
AnalyzeRequest request = prepareStorage("files/Minimal Examples/merge_images.pdf");
|
||||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||||
AnalyzeResult result = analyzeService.analyze(request);
|
AnalyzeResult result = analyzeService.analyze(request);
|
||||||
|
|
||||||
@ -313,8 +338,7 @@ public class RedactionIntegrationTest {
|
|||||||
|
|
||||||
// F. Lastname, J. Doe, M. Mustermann
|
// F. Lastname, J. Doe, M. Mustermann
|
||||||
// Lastname M., Doe J., Mustermann M.
|
// Lastname M., Doe J., Mustermann M.
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/ExpansionTest.pdf");
|
AnalyzeRequest request = prepareStorage("files/Minimal Examples/ExpansionTest.pdf");
|
||||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
|
||||||
|
|
||||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||||
AnalyzeResult result = analyzeService.analyze(request);
|
AnalyzeResult result = analyzeService.analyze(request);
|
||||||
@ -334,8 +358,7 @@ public class RedactionIntegrationTest {
|
|||||||
@Test
|
@Test
|
||||||
public void titleExtraction() throws IOException {
|
public void titleExtraction() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/RSS/32 - Emamectin Benzoate Technical - Acute Oral Toxicity - Mouse.pdf");
|
AnalyzeRequest request = prepareStorage("files/RSS/32 - Emamectin Benzoate Technical - Acute Oral Toxicity - Mouse.pdf");
|
||||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
|
||||||
|
|
||||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||||
AnalyzeResult result = analyzeService.analyze(request);
|
AnalyzeResult result = analyzeService.analyze(request);
|
||||||
@ -367,8 +390,7 @@ public class RedactionIntegrationTest {
|
|||||||
|
|
||||||
System.out.println("testIgnoreHint");
|
System.out.println("testIgnoreHint");
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/new/test-ignore-hint.pdf");
|
AnalyzeRequest request = prepareStorage("files/new/test-ignore-hint.pdf");
|
||||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
|
||||||
|
|
||||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||||
analyzeService.analyze(request);
|
analyzeService.analyze(request);
|
||||||
@ -426,7 +448,7 @@ public class RedactionIntegrationTest {
|
|||||||
}
|
}
|
||||||
for (File path : input) {
|
for (File path : input) {
|
||||||
|
|
||||||
AnalyzeRequest request = prepareStorage(new FileInputStream((path)));
|
AnalyzeRequest request = prepareStorage(path.getPath());
|
||||||
System.out.println("Redacting file : " + path.getName());
|
System.out.println("Redacting file : " + path.getName());
|
||||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||||
|
|
||||||
@ -471,8 +493,7 @@ public class RedactionIntegrationTest {
|
|||||||
String outputFileName = OsUtils.getTemporaryDirectory() + "/AnnotatedRedactionTestSeparatedRedaction.pdf";
|
String outputFileName = OsUtils.getTemporaryDirectory() + "/AnnotatedRedactionTestSeparatedRedaction.pdf";
|
||||||
|
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource(fileName);
|
AnalyzeRequest request = prepareStorage(fileName);
|
||||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
|
||||||
request.setExcludedPages(Set.of(1));
|
request.setExcludedPages(Set.of(1));
|
||||||
|
|
||||||
request.setFileAttributes(List.of(FileAttribute.builder()
|
request.setFileAttributes(List.of(FileAttribute.builder()
|
||||||
@ -582,8 +603,7 @@ public class RedactionIntegrationTest {
|
|||||||
String fileName = "files/new/test1S1T1.pdf";
|
String fileName = "files/new/test1S1T1.pdf";
|
||||||
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
|
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource(fileName);
|
AnalyzeRequest request = prepareStorage(fileName);
|
||||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
|
||||||
|
|
||||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||||
analyzeService.analyze(request);
|
analyzeService.analyze(request);
|
||||||
@ -637,8 +657,7 @@ public class RedactionIntegrationTest {
|
|||||||
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), responseJson.getInputStream());
|
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), responseJson.getInputStream());
|
||||||
|
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource(fileName);
|
AnalyzeRequest request = prepareStorage(fileName);
|
||||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
|
||||||
request.setExcludedPages(Set.of(1));
|
request.setExcludedPages(Set.of(1));
|
||||||
|
|
||||||
request.setFileAttributes(List.of(FileAttribute.builder()
|
request.setFileAttributes(List.of(FileAttribute.builder()
|
||||||
@ -836,6 +855,60 @@ public class RedactionIntegrationTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTableRedactionWithCvTableService() throws IOException {
|
||||||
|
|
||||||
|
long start = System.currentTimeMillis();
|
||||||
|
|
||||||
|
String fileName = "files/new/VV-511309.pdf";
|
||||||
|
String tableServiceResponseFile = "files/cv_table_response_VV-511309.json";
|
||||||
|
|
||||||
|
AnalyzeRequest request = prepareStorage(fileName, tableServiceResponseFile);
|
||||||
|
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||||
|
AnalyzeResult result = analyzeService.analyze(request);
|
||||||
|
|
||||||
|
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
|
||||||
|
.dossierId(TEST_DOSSIER_ID)
|
||||||
|
.fileId(TEST_FILE_ID)
|
||||||
|
.build());
|
||||||
|
|
||||||
|
try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Annotated.pdf")) {
|
||||||
|
fileOutputStream.write(annotateResponse.getDocument());
|
||||||
|
}
|
||||||
|
long end = System.currentTimeMillis();
|
||||||
|
|
||||||
|
System.out.println("duration: " + (end - start));
|
||||||
|
System.out.println("numberOfPages: " + result.getNumberOfPages());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTableRedactionWithOcrAndCvTableService() throws IOException {
|
||||||
|
|
||||||
|
long start = System.currentTimeMillis();
|
||||||
|
|
||||||
|
String fileName = "files/new/VV-511309_OCR.pdf";
|
||||||
|
String tableServiceResponseFile = "files/cv_table_response_VV-511309.json";
|
||||||
|
|
||||||
|
AnalyzeRequest request = prepareStorage(fileName, tableServiceResponseFile);
|
||||||
|
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||||
|
AnalyzeResult result = analyzeService.analyze(request);
|
||||||
|
|
||||||
|
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
|
||||||
|
.dossierId(TEST_DOSSIER_ID)
|
||||||
|
.fileId(TEST_FILE_ID)
|
||||||
|
.build());
|
||||||
|
|
||||||
|
try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Annotated.pdf")) {
|
||||||
|
fileOutputStream.write(annotateResponse.getDocument());
|
||||||
|
}
|
||||||
|
long end = System.currentTimeMillis();
|
||||||
|
|
||||||
|
System.out.println("duration: " + (end - start));
|
||||||
|
System.out.println("numberOfPages: " + result.getNumberOfPages());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testUnicodeProblem() throws IOException {
|
public void testUnicodeProblem() throws IOException {
|
||||||
|
|
||||||
@ -1005,7 +1078,7 @@ public class RedactionIntegrationTest {
|
|||||||
|
|
||||||
System.out.println("testManualRedaction");
|
System.out.println("testManualRedaction");
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
|
String pdfFile = "files/Minimal Examples/Single Table.pdf";
|
||||||
|
|
||||||
ManualRedactions manualRedactions = new ManualRedactions();
|
ManualRedactions manualRedactions = new ManualRedactions();
|
||||||
|
|
||||||
@ -1053,7 +1126,7 @@ public class RedactionIntegrationTest {
|
|||||||
.page(1)
|
.page(1)
|
||||||
.build()));
|
.build()));
|
||||||
|
|
||||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
AnalyzeRequest request = prepareStorage(pdfFile);
|
||||||
request.setManualRedactions(manualRedactions);
|
request.setManualRedactions(manualRedactions);
|
||||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||||
AnalyzeResult result = analyzeService.analyze(request);
|
AnalyzeResult result = analyzeService.analyze(request);
|
||||||
@ -1094,9 +1167,8 @@ public class RedactionIntegrationTest {
|
|||||||
public void classificationTest() throws IOException {
|
public void classificationTest() throws IOException {
|
||||||
|
|
||||||
System.out.println("classificationTest");
|
System.out.println("classificationTest");
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/RSS/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf");
|
|
||||||
|
|
||||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
|
||||||
|
|
||||||
RedactionRequest redactionRequest = RedactionRequest.builder()
|
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||||
.dossierId(request.getDossierId())
|
.dossierId(request.getDossierId())
|
||||||
@ -1111,14 +1183,34 @@ public class RedactionIntegrationTest {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void classificationTestWithCvTableService() throws IOException {
|
||||||
|
|
||||||
|
System.out.println("classificationTest");
|
||||||
|
|
||||||
|
String tableServiceResponseFile = "files/cv_table_response_VV-511309.json";
|
||||||
|
AnalyzeRequest request = prepareStorage("files/new/VV-511309_OCR.pdf", tableServiceResponseFile);
|
||||||
|
|
||||||
|
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||||
|
.dossierId(request.getDossierId())
|
||||||
|
.fileId(request.getFileId())
|
||||||
|
.dossierTemplateId(request.getDossierTemplateId())
|
||||||
|
.build();
|
||||||
|
|
||||||
|
RedactionResult result = redactionController.classify(redactionRequest);
|
||||||
|
|
||||||
|
try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Classified.pdf")) {
|
||||||
|
fileOutputStream.write(result.getDocument());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void sectionsTest() throws IOException {
|
public void sectionsTest() throws IOException {
|
||||||
|
|
||||||
System.out.println("sectionsTest");
|
System.out.println("sectionsTest");
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
|
|
||||||
|
|
||||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
AnalyzeRequest request = prepareStorage("files/Minimal Examples/Single Table.pdf");
|
||||||
|
|
||||||
RedactionRequest redactionRequest = RedactionRequest.builder()
|
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||||
.dossierId(request.getDossierId())
|
.dossierId(request.getDossierId())
|
||||||
@ -1138,9 +1230,7 @@ public class RedactionIntegrationTest {
|
|||||||
public void htmlTablesTest() throws IOException {
|
public void htmlTablesTest() throws IOException {
|
||||||
|
|
||||||
System.out.println("htmlTablesTest");
|
System.out.println("htmlTablesTest");
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
|
AnalyzeRequest request = prepareStorage("files/Minimal Examples/Single Table.pdf");
|
||||||
|
|
||||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
|
||||||
|
|
||||||
RedactionRequest redactionRequest = RedactionRequest.builder()
|
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||||
.dossierId(request.getDossierId())
|
.dossierId(request.getDossierId())
|
||||||
@ -1160,9 +1250,8 @@ public class RedactionIntegrationTest {
|
|||||||
public void htmlTableRotationTest() throws IOException {
|
public void htmlTableRotationTest() throws IOException {
|
||||||
|
|
||||||
System.out.println("htmlTableRotationTest");
|
System.out.println("htmlTableRotationTest");
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
|
||||||
|
|
||||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
||||||
|
|
||||||
RedactionRequest redactionRequest = RedactionRequest.builder()
|
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||||
.dossierId(request.getDossierId())
|
.dossierId(request.getDossierId())
|
||||||
@ -1181,9 +1270,7 @@ public class RedactionIntegrationTest {
|
|||||||
@Test
|
@Test
|
||||||
public void phantomCellsDocumentTest() throws IOException {
|
public void phantomCellsDocumentTest() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Phantom Cells.pdf");
|
AnalyzeRequest request = prepareStorage("files/Minimal Examples/Phantom Cells.pdf");
|
||||||
|
|
||||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
|
||||||
|
|
||||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||||
AnalyzeResult result = analyzeService.analyze(request);
|
AnalyzeResult result = analyzeService.analyze(request);
|
||||||
@ -1202,9 +1289,8 @@ public class RedactionIntegrationTest {
|
|||||||
public void sponsorCompanyTest() throws IOException {
|
public void sponsorCompanyTest() throws IOException {
|
||||||
|
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/sponsor_companies.pdf");
|
|
||||||
|
|
||||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
AnalyzeRequest request = prepareStorage("files/Minimal Examples/sponsor_companies.pdf");
|
||||||
|
|
||||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||||
AnalyzeResult result = analyzeService.analyze(request);
|
AnalyzeResult result = analyzeService.analyze(request);
|
||||||
@ -1228,7 +1314,7 @@ public class RedactionIntegrationTest {
|
|||||||
@Ignore
|
@Ignore
|
||||||
public void resizeRedactionTest() throws IOException {
|
public void resizeRedactionTest() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
|
String pdfFile = "files/Minimal Examples/Single Table.pdf";
|
||||||
|
|
||||||
ManualRedactions manualRedactions = new ManualRedactions();
|
ManualRedactions manualRedactions = new ManualRedactions();
|
||||||
|
|
||||||
@ -1284,7 +1370,7 @@ public class RedactionIntegrationTest {
|
|||||||
|
|
||||||
// manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
|
// manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
|
||||||
|
|
||||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
AnalyzeRequest request = prepareStorage(pdfFile);
|
||||||
request.setManualRedactions(manualRedactions);
|
request.setManualRedactions(manualRedactions);
|
||||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||||
AnalyzeResult result = analyzeService.analyze(request);
|
AnalyzeResult result = analyzeService.analyze(request);
|
||||||
@ -1384,7 +1470,7 @@ public class RedactionIntegrationTest {
|
|||||||
@Ignore
|
@Ignore
|
||||||
public void testManualSurroundingText() throws IOException {
|
public void testManualSurroundingText() throws IOException {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/new/S4.pdf");
|
String pdfFile = "files/new/S4.pdf";
|
||||||
|
|
||||||
ManualRedactions manualRedactions = new ManualRedactions();
|
ManualRedactions manualRedactions = new ManualRedactions();
|
||||||
|
|
||||||
@ -1437,7 +1523,7 @@ public class RedactionIntegrationTest {
|
|||||||
manualRedactions.getEntriesToAdd().add(manualRedactionEntry2);
|
manualRedactions.getEntriesToAdd().add(manualRedactionEntry2);
|
||||||
manualRedactions.getEntriesToAdd().add(manualRedactionEntry3);
|
manualRedactions.getEntriesToAdd().add(manualRedactionEntry3);
|
||||||
|
|
||||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
AnalyzeRequest request = prepareStorage(pdfFile);
|
||||||
request.setManualRedactions(manualRedactions);
|
request.setManualRedactions(manualRedactions);
|
||||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||||
AnalyzeResult result = analyzeService.analyze(request);
|
AnalyzeResult result = analyzeService.analyze(request);
|
||||||
@ -1791,10 +1877,9 @@ public class RedactionIntegrationTest {
|
|||||||
public void testImportedRedactions() throws IOException {
|
public void testImportedRedactions() throws IOException {
|
||||||
|
|
||||||
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
|
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/ImportedRedactions/RotateTestFile_without_highlights.pdf");
|
|
||||||
ClassPathResource importedRedactions = new ClassPathResource("files/ImportedRedactions/RotateTestFile_without_highlights.IMPORTED_REDACTIONS.json");
|
ClassPathResource importedRedactions = new ClassPathResource("files/ImportedRedactions/RotateTestFile_without_highlights.IMPORTED_REDACTIONS.json");
|
||||||
|
|
||||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
AnalyzeRequest request = prepareStorage("files/ImportedRedactions/RotateTestFile_without_highlights.pdf");
|
||||||
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), importedRedactions.getInputStream());
|
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), importedRedactions.getInputStream());
|
||||||
|
|
||||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||||
@ -1838,8 +1923,7 @@ public class RedactionIntegrationTest {
|
|||||||
String fileName = "files/mr-mrs.pdf";
|
String fileName = "files/mr-mrs.pdf";
|
||||||
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
|
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource(fileName);
|
AnalyzeRequest request = prepareStorage(fileName);
|
||||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
|
||||||
|
|
||||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||||
analyzeService.analyze(request);
|
analyzeService.analyze(request);
|
||||||
@ -1866,7 +1950,7 @@ public class RedactionIntegrationTest {
|
|||||||
|
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
private AnalyzeRequest prepareStorage(InputStream stream) {
|
private AnalyzeRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream) {
|
||||||
|
|
||||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
AnalyzeRequest request = AnalyzeRequest.builder()
|
||||||
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
|
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
|
||||||
@ -1875,7 +1959,8 @@ public class RedactionIntegrationTest {
|
|||||||
.lastProcessed(OffsetDateTime.now())
|
.lastProcessed(OffsetDateTime.now())
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), stream);
|
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES), cvServiceResponseFileStream);
|
||||||
|
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), fileStream);
|
||||||
|
|
||||||
return request;
|
return request;
|
||||||
|
|
||||||
@ -1885,9 +1970,16 @@ public class RedactionIntegrationTest {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
private AnalyzeRequest prepareStorage(String file) {
|
private AnalyzeRequest prepareStorage(String file) {
|
||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource(file);
|
return prepareStorage(file, "files/cv_service_empty_response.json");
|
||||||
|
}
|
||||||
|
|
||||||
return prepareStorage(pdfFileResource.getInputStream());
|
@SneakyThrows
|
||||||
|
private AnalyzeRequest prepareStorage(String file, String cvServiceResponseFile) {
|
||||||
|
|
||||||
|
ClassPathResource pdfFileResource = new ClassPathResource(file);
|
||||||
|
ClassPathResource cvServiceResponseFileResource = new ClassPathResource(cvServiceResponseFile);
|
||||||
|
|
||||||
|
return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -555,6 +555,7 @@ public class RulesTest {
|
|||||||
.lastProcessed(OffsetDateTime.now())
|
.lastProcessed(OffsetDateTime.now())
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES), new ClassPathResource("files/cv_service_empty_response.json").getInputStream());
|
||||||
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), stream);
|
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), stream);
|
||||||
|
|
||||||
return request;
|
return request;
|
||||||
|
|||||||
@ -20,14 +20,18 @@ import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
|||||||
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
|
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
|
||||||
import org.springframework.boot.test.context.SpringBootTest;
|
import org.springframework.boot.test.context.SpringBootTest;
|
||||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||||
|
import org.springframework.context.annotation.Bean;
|
||||||
import org.springframework.context.annotation.Configuration;
|
import org.springframework.context.annotation.Configuration;
|
||||||
import org.springframework.context.annotation.Import;
|
import org.springframework.context.annotation.Import;
|
||||||
|
import org.springframework.context.annotation.Primary;
|
||||||
import org.springframework.core.io.ClassPathResource;
|
import org.springframework.core.io.ClassPathResource;
|
||||||
import org.springframework.test.context.junit4.SpringRunner;
|
import org.springframework.test.context.junit4.SpringRunner;
|
||||||
|
|
||||||
import com.amazonaws.services.s3.AmazonS3;
|
import com.amazonaws.services.s3.AmazonS3;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
|
||||||
import com.iqser.red.service.redaction.v1.server.Application;
|
import com.iqser.red.service.redaction.v1.server.Application;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.FileSystemBackedStorageService;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
|
import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
|
||||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||||
@ -35,10 +39,13 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType;
|
|||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.RedRectangle2D;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.RedRectangle2D;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.image.ImageServiceResponse;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.image.ImageServiceResponse;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
|
||||||
|
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||||
|
import com.iqser.red.storage.commons.service.StorageService;
|
||||||
|
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
|
|
||||||
@ -71,13 +78,24 @@ public class PdfSegmentationServiceTest {
|
|||||||
@MockBean
|
@MockBean
|
||||||
private LegalBasisClient legalBasisClient;
|
private LegalBasisClient legalBasisClient;
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
private StorageService storageService;
|
||||||
|
|
||||||
@Autowired
|
@Autowired
|
||||||
private ObjectMapper objectMapper;
|
private ObjectMapper objectMapper;
|
||||||
|
|
||||||
@Configuration
|
private final static String TEST_DOSSIER_ID = "123";
|
||||||
@EnableAutoConfiguration(exclude = { RabbitAutoConfiguration.class})
|
private final static String TEST_FILE_ID = "123";
|
||||||
public static class TestConfiguration {
|
|
||||||
|
|
||||||
|
@Configuration
|
||||||
|
@EnableAutoConfiguration(exclude = { RabbitAutoConfiguration.class, StorageAutoConfiguration.class})
|
||||||
|
public static class TestConfiguration {
|
||||||
|
@Bean
|
||||||
|
@Primary
|
||||||
|
public StorageService inmemoryStorage() {
|
||||||
|
|
||||||
|
return new FileSystemBackedStorageService();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -85,6 +103,7 @@ public class PdfSegmentationServiceTest {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void testMapping() {
|
public void testMapping() {
|
||||||
|
|
||||||
|
prepareStorage();
|
||||||
ClassPathResource responseJson = new ClassPathResource("files/image_response.json");
|
ClassPathResource responseJson = new ClassPathResource("files/image_response.json");
|
||||||
ImageServiceResponse imageServiceResponse = objectMapper.readValue(responseJson.getInputStream(), ImageServiceResponse.class);
|
ImageServiceResponse imageServiceResponse = objectMapper.readValue(responseJson.getInputStream(), ImageServiceResponse.class);
|
||||||
|
|
||||||
@ -103,9 +122,10 @@ public class PdfSegmentationServiceTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testPDFSegmentationWithComplexTable() throws IOException {
|
public void testPDFSegmentationWithComplexTable() throws IOException {
|
||||||
|
|
||||||
|
prepareStorage();
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf");
|
||||||
|
|
||||||
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream(), null);
|
Document document = pdfSegmentationService.parseDocument(TEST_DOSSIER_ID, TEST_FILE_ID, pdfFileResource.getInputStream(), null);
|
||||||
assertThat(document.getParagraphs()
|
assertThat(document.getParagraphs()
|
||||||
.stream()
|
.stream()
|
||||||
.flatMap(paragraph -> paragraph.getTables().stream())
|
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||||
@ -124,9 +144,10 @@ public class PdfSegmentationServiceTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testTableExtraction() throws IOException {
|
public void testTableExtraction() throws IOException {
|
||||||
|
|
||||||
|
prepareStorage();
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Table.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Table.pdf");
|
||||||
|
|
||||||
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream(), null);
|
Document document = pdfSegmentationService.parseDocument(TEST_DOSSIER_ID, TEST_FILE_ID, pdfFileResource.getInputStream(), null);
|
||||||
assertThat(document.getParagraphs()
|
assertThat(document.getParagraphs()
|
||||||
.stream()
|
.stream()
|
||||||
.flatMap(paragraph -> paragraph.getTables().stream())
|
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||||
@ -162,9 +183,10 @@ public class PdfSegmentationServiceTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testMultiPageMetadataPropagation() throws IOException {
|
public void testMultiPageMetadataPropagation() throws IOException {
|
||||||
|
|
||||||
|
prepareStorage();
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Multi Page Table.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Multi Page Table.pdf");
|
||||||
|
|
||||||
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream(), null);
|
Document document = pdfSegmentationService.parseDocument(TEST_DOSSIER_ID, TEST_FILE_ID, pdfFileResource.getInputStream(), null);
|
||||||
assertThat(document.getParagraphs()
|
assertThat(document.getParagraphs()
|
||||||
.stream()
|
.stream()
|
||||||
.flatMap(paragraph -> paragraph.getTables().stream())
|
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||||
@ -200,9 +222,10 @@ public class PdfSegmentationServiceTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testHeaderCellsForRotatedTable() throws IOException {
|
public void testHeaderCellsForRotatedTable() throws IOException {
|
||||||
|
|
||||||
|
prepareStorage();
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Rotated Table Headers.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Rotated Table Headers.pdf");
|
||||||
|
|
||||||
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream(), null);
|
Document document = pdfSegmentationService.parseDocument(TEST_DOSSIER_ID, TEST_FILE_ID, pdfFileResource.getInputStream(), null);
|
||||||
assertThat(document.getParagraphs()
|
assertThat(document.getParagraphs()
|
||||||
.stream()
|
.stream()
|
||||||
.flatMap(paragraph -> paragraph.getTables().stream())
|
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||||
@ -235,4 +258,10 @@ public class PdfSegmentationServiceTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@SneakyThrows
|
||||||
|
private void prepareStorage() {
|
||||||
|
|
||||||
|
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES), new ClassPathResource("files/cv_service_empty_response.json").getInputStream());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
Binary file not shown.
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"dossierId": "123",
|
||||||
|
"fileId": "123",
|
||||||
|
"operation": "table",
|
||||||
|
"targetFileExtension": "ORIGIN.pdf.gz",
|
||||||
|
"responseFileExtension": "TABLES.json.gz",
|
||||||
|
"data": []
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user