Pull request #483: RED-5028: Integrated cv table service
Merge in RED/redaction-service from RED-5028 to master * commit 'f6bc49d42c65a8580a5558891cabd4738af01d87': RED-5028: Integrated cv table service
This commit is contained in:
commit
9d88925ff1
@ -12,7 +12,7 @@
|
||||
<artifactId>redaction-service-api-v1</artifactId>
|
||||
|
||||
<properties>
|
||||
<persistence-service.version>1.254.0</persistence-service.version>
|
||||
<persistence-service.version>1.299.0</persistence-service.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
|
||||
@ -2,6 +2,13 @@ package com.iqser.red.service.redaction.v1.server.classification.service;
|
||||
|
||||
import static java.util.stream.Collectors.toSet;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.FloatFrequencyCounter;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Orientation;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
@ -15,13 +22,6 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
@Service
|
||||
@SuppressWarnings("all")
|
||||
public class BlockificationService {
|
||||
@ -48,10 +48,12 @@ public class BlockificationService {
|
||||
boolean startFromTop = word.getY1() > maxY + word.getHeight();
|
||||
boolean splitByX = prev != null && maxX + 50 < word.getX1() && prev.getY1() == word.getY1();
|
||||
boolean newLineAfterSplit = prev != null && word.getY1() != prev.getY1() && wasSplitted && splitX1 != word.getX1();
|
||||
boolean splittedByRuling = word.getRotation() == 0 && isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), verticalRulingLines) || word
|
||||
.getRotation() == 0 && isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), horizontalRulingLines) || word
|
||||
.getRotation() == 90 && isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), horizontalRulingLines) || word
|
||||
.getRotation() == 90 && isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), verticalRulingLines);
|
||||
boolean splittedByRuling =
|
||||
isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), verticalRulingLines) ||
|
||||
isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), horizontalRulingLines)
|
||||
|
||||
|| isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), horizontalRulingLines)
|
||||
|| isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), verticalRulingLines);
|
||||
|
||||
if (prev != null && (lineSeparation || startFromTop || splitByX || newLineAfterSplit || splittedByRuling)) {
|
||||
|
||||
|
||||
@ -48,7 +48,7 @@ public class RedactionController implements RedactionResource {
|
||||
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
try {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null);
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(redactionRequest.getDossierId(), redactionRequest.getFileId(), storedObjectStream, null);
|
||||
|
||||
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
|
||||
@ -74,7 +74,7 @@ public class RedactionController implements RedactionResource {
|
||||
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
try {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null);
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(redactionRequest.getDossierId(), redactionRequest.getFileId(), storedObjectStream, null);
|
||||
|
||||
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
|
||||
@ -101,7 +101,7 @@ public class RedactionController implements RedactionResource {
|
||||
|
||||
try {
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null);
|
||||
classifiedDoc = pdfSegmentationService.parseDocument(redactionRequest.getDossierId(), redactionRequest.getFileId(), storedObjectStream, null);
|
||||
} catch (Exception e) {
|
||||
throw new RedactionException(e);
|
||||
}
|
||||
|
||||
@ -0,0 +1,17 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.NonNull;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Data
|
||||
@RequiredArgsConstructor
|
||||
public class PdfTable {
|
||||
|
||||
@NonNull
|
||||
private List<PdfTableCell> tableCells = new ArrayList<>();
|
||||
|
||||
}
|
||||
@ -0,0 +1,21 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@RequiredArgsConstructor
|
||||
public class PdfTableCell {
|
||||
|
||||
private float x0;
|
||||
private float y0;
|
||||
private float x1;
|
||||
private float y1;
|
||||
private float width;
|
||||
private float height;
|
||||
|
||||
}
|
||||
@ -0,0 +1,16 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.table;
|
||||
|
||||
import com.dslplatform.json.CompiledJson;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@CompiledJson
|
||||
public class PageInfo {
|
||||
|
||||
private int number;
|
||||
private int rotation;
|
||||
private float width;
|
||||
private float height;
|
||||
|
||||
}
|
||||
@ -0,0 +1,18 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.table;
|
||||
|
||||
import com.dslplatform.json.CompiledJson;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@CompiledJson
|
||||
public class TableCells {
|
||||
|
||||
private float x0;
|
||||
private float y0;
|
||||
private float x1;
|
||||
private float y1;
|
||||
private float width;
|
||||
private float height;
|
||||
|
||||
}
|
||||
@ -0,0 +1,17 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.table;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.dslplatform.json.CompiledJson;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@CompiledJson
|
||||
public class TableData {
|
||||
|
||||
private PageInfo pageInfo;
|
||||
private List<TableCells> tableCells = new ArrayList<>();
|
||||
|
||||
}
|
||||
@ -0,0 +1,21 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.table;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.dslplatform.json.CompiledJson;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@CompiledJson
|
||||
public class TableServiceResponse {
|
||||
private String dossierId;
|
||||
private String fileId;
|
||||
private String operation;
|
||||
private String targetFileExtension;
|
||||
private String responseFileExtension;
|
||||
|
||||
private List<TableData> data = new ArrayList<>();
|
||||
|
||||
}
|
||||
@ -96,7 +96,8 @@ public class AnalyzeService {
|
||||
if (redactionServiceSettings.isEnableImageClassification()) {
|
||||
pdfImages = imageService.convertImages(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
}
|
||||
classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, pdfImages);
|
||||
|
||||
classifiedDoc = pdfSegmentationService.parseDocument(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), storedObjectStream, pdfImages);
|
||||
pageCount = classifiedDoc.getPages().size();
|
||||
} catch (Exception e) {
|
||||
throw new RedactionException(e);
|
||||
|
||||
@ -9,6 +9,7 @@ import java.nio.file.attribute.FileAttribute;
|
||||
import java.nio.file.attribute.PosixFilePermission;
|
||||
import java.nio.file.attribute.PosixFilePermissions;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
@ -29,6 +30,8 @@ import com.iqser.red.service.redaction.v1.server.classification.service.Classifi
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.PDFLinesTextStripper;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfTableCell;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
|
||||
@ -42,15 +45,17 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@RequiredArgsConstructor
|
||||
public class PdfSegmentationService {
|
||||
|
||||
private final RedactionServiceSettings redactionServiceSettings;
|
||||
private final RulingCleaningService rulingCleaningService;
|
||||
private final TableExtractionService tableExtractionService;
|
||||
private final BlockificationService blockificationService;
|
||||
private final ClassificationService classificationService;
|
||||
private final SectionsBuilderService sectionsBuilderService;
|
||||
private final ImageService imageService;
|
||||
private final TableService tableService;
|
||||
|
||||
|
||||
public Document parseDocument(InputStream documentInputStream, Map<Integer, List<PdfImage>> pdfImages) throws IOException {
|
||||
public Document parseDocument(String dossierId, String fileId, InputStream documentInputStream, Map<Integer, List<PdfImage>> pdfImages) throws IOException {
|
||||
|
||||
PDDocument pdDocument = null;
|
||||
try {
|
||||
@ -67,6 +72,11 @@ public class PdfSegmentationService {
|
||||
tempFile.setExecutable(true, true);
|
||||
}
|
||||
|
||||
Map<Integer, List<PdfTableCell>> pdfTableCells = new HashMap<>();
|
||||
if (redactionServiceSettings.isCvServiceEnabled()) {
|
||||
pdfTableCells = tableService.convertTables(dossierId, fileId);
|
||||
}
|
||||
|
||||
try (var fos = new FileOutputStream(tempFile)) {
|
||||
IOUtils.copy(documentInputStream, fos);
|
||||
|
||||
@ -94,12 +104,8 @@ public class PdfSegmentationService {
|
||||
int rotation = pdPage.getRotation();
|
||||
boolean isRotated = rotation != 0 && rotation != 360;
|
||||
|
||||
CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(stripper.getRulings(), stripper.getMinCharWidth(), stripper
|
||||
.getMaxCharHeight());
|
||||
|
||||
Page page = blockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings
|
||||
.getVertical());
|
||||
|
||||
CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(pdfTableCells.get(pageNumber), stripper.getRulings(), stripper.getMinCharWidth(), stripper.getMaxCharHeight());
|
||||
Page page = blockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
|
||||
|
||||
PDRectangle cropbox = pdPage.getCropBox();
|
||||
float cropboxArea = cropbox.getHeight() * cropbox.getWidth();
|
||||
@ -109,7 +115,6 @@ public class PdfSegmentationService {
|
||||
page.setLandscape(isLandscape || isRotated);
|
||||
page.setPageNumber(pageNumber);
|
||||
|
||||
|
||||
tableExtractionService.extractTables(cleanRulings, page);
|
||||
buildPageStatistics(page);
|
||||
increaseDocumentStatistics(page, document);
|
||||
@ -128,7 +133,6 @@ public class PdfSegmentationService {
|
||||
sectionsBuilderService.buildSections(document);
|
||||
sectionsBuilderService.addImagesToSections(document);
|
||||
|
||||
|
||||
IOUtils.close(pdDocument);
|
||||
|
||||
if (!tempFile.delete()) {
|
||||
|
||||
@ -0,0 +1,54 @@
|
||||
package com.iqser.red.service.redaction.v1.server.segmentation;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfTableCell;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.table.TableCells;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.table.TableServiceResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class TableService {
|
||||
|
||||
private final ObjectMapper objectMapper;
|
||||
private final RedactionStorageService redactionStorageService;
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public Map<Integer, List<PdfTableCell>> convertTables(String dossierId, String fileId) {
|
||||
|
||||
var tableClassificationStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(dossierId, fileId, FileType.TABLES));
|
||||
|
||||
TableServiceResponse tableServiceResponse = objectMapper.readValue(tableClassificationStream, TableServiceResponse.class);
|
||||
|
||||
Map<Integer, List<PdfTableCell>> tableCells = new HashMap<>();
|
||||
tableServiceResponse.getData().forEach(tableData -> tableCells.computeIfAbsent(tableData.getPageInfo().getNumber(), tableCell -> new ArrayList<>()).addAll(convertTableCells(tableData.getTableCells())));
|
||||
|
||||
return tableCells;
|
||||
}
|
||||
|
||||
|
||||
private Collection<? extends PdfTableCell> convertTableCells(List<TableCells> tableCells) {
|
||||
|
||||
List<PdfTableCell> pdfTableCells = new ArrayList<>();
|
||||
|
||||
tableCells.forEach(t -> pdfTableCells.add(PdfTableCell.builder().y0(t.getY0()).x1(t.getX1()).y1(t.getY1()).x0(t.getX0()).width(t.getWidth()).height(t.getHeight()).build()));
|
||||
|
||||
return pdfTableCells;
|
||||
}
|
||||
|
||||
}
|
||||
@ -14,6 +14,8 @@ public class RedactionServiceSettings {
|
||||
|
||||
private boolean enableImageClassification = true;
|
||||
|
||||
private boolean cvServiceEnabled = true;
|
||||
|
||||
private float maxImageCropboxRatio = 0.9f;
|
||||
|
||||
private int analysisVersion = 1;
|
||||
|
||||
@ -1,13 +1,22 @@
|
||||
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
@Slf4j
|
||||
public class Table extends AbstractTextContainer {
|
||||
@ -160,7 +169,7 @@ public class Table extends AbstractTextContainer {
|
||||
for (int i = unrotatedColCount - 1; i >= 0; i--) { // rows
|
||||
List<Cell> lastRow = new ArrayList<>();
|
||||
for (int j = 0; j < unrotatedRowCount; j++) { // cols
|
||||
Cell cell = cells.get(new CellPosition(i, j));
|
||||
Cell cell = cells.get(new CellPosition(j, i));
|
||||
if (cell != null) {
|
||||
lastRow.add(cell);
|
||||
}
|
||||
|
||||
@ -1,18 +1,34 @@
|
||||
package com.iqser.red.service.redaction.v1.server.tableextraction.service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.awt.geom.Line2D;
|
||||
import java.awt.geom.Point2D;
|
||||
import java.util.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfTableCell;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class RulingCleaningService {
|
||||
|
||||
public CleanRulings getCleanRulings(List<Ruling> rulings, float minCharWidth, float maxCharHeight) {
|
||||
private final RedactionServiceSettings redactionServiceSettings;
|
||||
|
||||
public CleanRulings getCleanRulings(List<PdfTableCell> pdfTableCells, List<Ruling> rulings, float minCharWidth, float maxCharHeight) {
|
||||
|
||||
if (!rulings.isEmpty()) {
|
||||
snapPoints(rulings, minCharWidth, maxCharHeight);
|
||||
@ -24,6 +40,9 @@ public class RulingCleaningService {
|
||||
vrs.add(vr);
|
||||
}
|
||||
}
|
||||
if (vrs.isEmpty() && redactionServiceSettings.isCvServiceEnabled()) {
|
||||
vrs.addAll(extractVerticalRulings(pdfTableCells));
|
||||
}
|
||||
List<Ruling> verticalRulingLines = collapseOrientedRulings(vrs);
|
||||
|
||||
List<Ruling> hrs = new ArrayList<>();
|
||||
@ -32,6 +51,9 @@ public class RulingCleaningService {
|
||||
hrs.add(hr);
|
||||
}
|
||||
}
|
||||
if (hrs.isEmpty() && redactionServiceSettings.isCvServiceEnabled()) {
|
||||
hrs.addAll(extractHorizontalRulings(pdfTableCells));
|
||||
}
|
||||
List<Ruling> horizontalRulingLines = collapseOrientedRulings(hrs);
|
||||
|
||||
return CleanRulings.builder().vertical(verticalRulingLines).horizontal(horizontalRulingLines).build();
|
||||
@ -113,6 +135,60 @@ public class RulingCleaningService {
|
||||
}
|
||||
|
||||
|
||||
private Collection<? extends Ruling> extractVerticalRulings(List<PdfTableCell> pdfTableCells) {
|
||||
|
||||
List<Ruling> vrs = new ArrayList<>();
|
||||
|
||||
if (pdfTableCells != null) {
|
||||
for (PdfTableCell pdfTableCell : pdfTableCells) {
|
||||
Ruling leftLine = createRuling(pdfTableCell.getX0(), pdfTableCell.getX0(), pdfTableCell.getY0(), pdfTableCell.getY1());
|
||||
Ruling rightLine = createRuling(pdfTableCell.getX1(), pdfTableCell.getX1(), pdfTableCell.getY0(), pdfTableCell.getY1());
|
||||
vrs.add(leftLine);
|
||||
vrs.add(rightLine);
|
||||
}
|
||||
}
|
||||
return vrs;
|
||||
}
|
||||
|
||||
|
||||
private Collection<? extends Ruling> extractHorizontalRulings(List<PdfTableCell> pdfTableCells) {
|
||||
|
||||
List<Ruling> hrs = new ArrayList<>();
|
||||
|
||||
if (pdfTableCells != null) {
|
||||
for (PdfTableCell pdfTableCell : pdfTableCells) {
|
||||
Ruling topLine = createRuling(pdfTableCell.getX0(), pdfTableCell.getX1(), pdfTableCell.getY1(), pdfTableCell.getY1());
|
||||
Ruling baseLine = createRuling(pdfTableCell.getX0(), pdfTableCell.getX1(), pdfTableCell.getY0(), pdfTableCell.getY0());
|
||||
hrs.add(topLine);
|
||||
hrs.add(baseLine);
|
||||
}
|
||||
}
|
||||
|
||||
return hrs;
|
||||
}
|
||||
|
||||
|
||||
private Ruling createRuling(float tableCellX0, float tableCellX1, float tableCellY0, float tableCellY1) {
|
||||
|
||||
float x0 = tableCellX0;
|
||||
float x1 = tableCellX1;
|
||||
float y0 = tableCellY0;
|
||||
float y1 = tableCellY1;
|
||||
|
||||
if (x1 < x0) {
|
||||
x0 = tableCellX1;
|
||||
x1 = tableCellX0;
|
||||
}
|
||||
|
||||
if (y1 < y0) {
|
||||
y0 = tableCellY1;
|
||||
y1 = tableCellY0;
|
||||
}
|
||||
|
||||
return new Ruling(new Point2D.Float(x0, y0), new Point2D.Float(x1, y1));
|
||||
}
|
||||
|
||||
|
||||
private List<Ruling> collapseOrientedRulings(List<Ruling> lines) {
|
||||
|
||||
int COLINEAR_OR_PARALLEL_PIXEL_EXPAND_AMOUNT = 1;
|
||||
|
||||
@ -1,36 +1,29 @@
|
||||
package com.iqser.red.service.redaction.v1.server;
|
||||
|
||||
import com.amazonaws.services.s3.AmazonS3;
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Comment;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.*;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.common.JSONPrimitive;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
|
||||
import com.iqser.red.service.redaction.v1.model.*;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import lombok.SneakyThrows;
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.ZoneOffset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Ignore;
|
||||
@ -55,16 +48,49 @@ import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.ZoneOffset;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
import com.amazonaws.services.s3.AmazonS3;
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Comment;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualLegalBasisChange;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualRedactionEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualResizeRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.common.JSONPrimitive;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.FileAttribute;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionResult;
|
||||
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.Mockito.when;
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@RunWith(SpringRunner.class)
|
||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
@ -268,9 +294,8 @@ public class RedactionIntegrationTest {
|
||||
public void testMergedImages() throws IOException {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/merge_images.pdf");
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
AnalyzeRequest request = prepareStorage("files/Minimal Examples/merge_images.pdf");
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
@ -313,8 +338,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
// F. Lastname, J. Doe, M. Mustermann
|
||||
// Lastname M., Doe J., Mustermann M.
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/ExpansionTest.pdf");
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
AnalyzeRequest request = prepareStorage("files/Minimal Examples/ExpansionTest.pdf");
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
@ -334,8 +358,7 @@ public class RedactionIntegrationTest {
|
||||
@Test
|
||||
public void titleExtraction() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/RSS/32 - Emamectin Benzoate Technical - Acute Oral Toxicity - Mouse.pdf");
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
AnalyzeRequest request = prepareStorage("files/RSS/32 - Emamectin Benzoate Technical - Acute Oral Toxicity - Mouse.pdf");
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
@ -367,8 +390,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
System.out.println("testIgnoreHint");
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/new/test-ignore-hint.pdf");
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
AnalyzeRequest request = prepareStorage("files/new/test-ignore-hint.pdf");
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
analyzeService.analyze(request);
|
||||
@ -426,7 +448,7 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
for (File path : input) {
|
||||
|
||||
AnalyzeRequest request = prepareStorage(new FileInputStream((path)));
|
||||
AnalyzeRequest request = prepareStorage(path.getPath());
|
||||
System.out.println("Redacting file : " + path.getName());
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
|
||||
@ -471,8 +493,7 @@ public class RedactionIntegrationTest {
|
||||
String outputFileName = OsUtils.getTemporaryDirectory() + "/AnnotatedRedactionTestSeparatedRedaction.pdf";
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource(fileName);
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
AnalyzeRequest request = prepareStorage(fileName);
|
||||
request.setExcludedPages(Set.of(1));
|
||||
|
||||
request.setFileAttributes(List.of(FileAttribute.builder()
|
||||
@ -582,8 +603,7 @@ public class RedactionIntegrationTest {
|
||||
String fileName = "files/new/test1S1T1.pdf";
|
||||
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource(fileName);
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
AnalyzeRequest request = prepareStorage(fileName);
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
analyzeService.analyze(request);
|
||||
@ -637,8 +657,7 @@ public class RedactionIntegrationTest {
|
||||
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), responseJson.getInputStream());
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource(fileName);
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
AnalyzeRequest request = prepareStorage(fileName);
|
||||
request.setExcludedPages(Set.of(1));
|
||||
|
||||
request.setFileAttributes(List.of(FileAttribute.builder()
|
||||
@ -836,6 +855,60 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testTableRedactionWithCvTableService() throws IOException {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
String fileName = "files/new/VV-511309.pdf";
|
||||
String tableServiceResponseFile = "files/cv_table_response_VV-511309.json";
|
||||
|
||||
AnalyzeRequest request = prepareStorage(fileName, tableServiceResponseFile);
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
.fileId(TEST_FILE_ID)
|
||||
.build());
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Annotated.pdf")) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
System.out.println("duration: " + (end - start));
|
||||
System.out.println("numberOfPages: " + result.getNumberOfPages());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testTableRedactionWithOcrAndCvTableService() throws IOException {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
String fileName = "files/new/VV-511309_OCR.pdf";
|
||||
String tableServiceResponseFile = "files/cv_table_response_VV-511309.json";
|
||||
|
||||
AnalyzeRequest request = prepareStorage(fileName, tableServiceResponseFile);
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
.fileId(TEST_FILE_ID)
|
||||
.build());
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Annotated.pdf")) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
System.out.println("duration: " + (end - start));
|
||||
System.out.println("numberOfPages: " + result.getNumberOfPages());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testUnicodeProblem() throws IOException {
|
||||
|
||||
@ -1005,7 +1078,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
System.out.println("testManualRedaction");
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
|
||||
String pdfFile = "files/Minimal Examples/Single Table.pdf";
|
||||
|
||||
ManualRedactions manualRedactions = new ManualRedactions();
|
||||
|
||||
@ -1053,7 +1126,7 @@ public class RedactionIntegrationTest {
|
||||
.page(1)
|
||||
.build()));
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
AnalyzeRequest request = prepareStorage(pdfFile);
|
||||
request.setManualRedactions(manualRedactions);
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
@ -1094,9 +1167,8 @@ public class RedactionIntegrationTest {
|
||||
public void classificationTest() throws IOException {
|
||||
|
||||
System.out.println("classificationTest");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/RSS/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf");
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
|
||||
|
||||
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||
.dossierId(request.getDossierId())
|
||||
@ -1111,14 +1183,34 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void classificationTestWithCvTableService() throws IOException {
|
||||
|
||||
System.out.println("classificationTest");
|
||||
|
||||
String tableServiceResponseFile = "files/cv_table_response_VV-511309.json";
|
||||
AnalyzeRequest request = prepareStorage("files/new/VV-511309_OCR.pdf", tableServiceResponseFile);
|
||||
|
||||
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||
.dossierId(request.getDossierId())
|
||||
.fileId(request.getFileId())
|
||||
.dossierTemplateId(request.getDossierTemplateId())
|
||||
.build();
|
||||
|
||||
RedactionResult result = redactionController.classify(redactionRequest);
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Classified.pdf")) {
|
||||
fileOutputStream.write(result.getDocument());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void sectionsTest() throws IOException {
|
||||
|
||||
System.out.println("sectionsTest");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
AnalyzeRequest request = prepareStorage("files/Minimal Examples/Single Table.pdf");
|
||||
|
||||
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||
.dossierId(request.getDossierId())
|
||||
@ -1138,9 +1230,7 @@ public class RedactionIntegrationTest {
|
||||
public void htmlTablesTest() throws IOException {
|
||||
|
||||
System.out.println("htmlTablesTest");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
AnalyzeRequest request = prepareStorage("files/Minimal Examples/Single Table.pdf");
|
||||
|
||||
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||
.dossierId(request.getDossierId())
|
||||
@ -1160,9 +1250,8 @@ public class RedactionIntegrationTest {
|
||||
public void htmlTableRotationTest() throws IOException {
|
||||
|
||||
System.out.println("htmlTableRotationTest");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
||||
|
||||
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||
.dossierId(request.getDossierId())
|
||||
@ -1181,9 +1270,7 @@ public class RedactionIntegrationTest {
|
||||
@Test
|
||||
public void phantomCellsDocumentTest() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Phantom Cells.pdf");
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
AnalyzeRequest request = prepareStorage("files/Minimal Examples/Phantom Cells.pdf");
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
@ -1202,9 +1289,8 @@ public class RedactionIntegrationTest {
|
||||
public void sponsorCompanyTest() throws IOException {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/sponsor_companies.pdf");
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
AnalyzeRequest request = prepareStorage("files/Minimal Examples/sponsor_companies.pdf");
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
@ -1228,7 +1314,7 @@ public class RedactionIntegrationTest {
|
||||
@Ignore
|
||||
public void resizeRedactionTest() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
|
||||
String pdfFile = "files/Minimal Examples/Single Table.pdf";
|
||||
|
||||
ManualRedactions manualRedactions = new ManualRedactions();
|
||||
|
||||
@ -1284,7 +1370,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
// manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
AnalyzeRequest request = prepareStorage(pdfFile);
|
||||
request.setManualRedactions(manualRedactions);
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
@ -1384,7 +1470,7 @@ public class RedactionIntegrationTest {
|
||||
@Ignore
|
||||
public void testManualSurroundingText() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/new/S4.pdf");
|
||||
String pdfFile = "files/new/S4.pdf";
|
||||
|
||||
ManualRedactions manualRedactions = new ManualRedactions();
|
||||
|
||||
@ -1437,7 +1523,7 @@ public class RedactionIntegrationTest {
|
||||
manualRedactions.getEntriesToAdd().add(manualRedactionEntry2);
|
||||
manualRedactions.getEntriesToAdd().add(manualRedactionEntry3);
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
AnalyzeRequest request = prepareStorage(pdfFile);
|
||||
request.setManualRedactions(manualRedactions);
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
@ -1791,10 +1877,9 @@ public class RedactionIntegrationTest {
|
||||
public void testImportedRedactions() throws IOException {
|
||||
|
||||
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/ImportedRedactions/RotateTestFile_without_highlights.pdf");
|
||||
ClassPathResource importedRedactions = new ClassPathResource("files/ImportedRedactions/RotateTestFile_without_highlights.IMPORTED_REDACTIONS.json");
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
AnalyzeRequest request = prepareStorage("files/ImportedRedactions/RotateTestFile_without_highlights.pdf");
|
||||
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), importedRedactions.getInputStream());
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
@ -1838,8 +1923,7 @@ public class RedactionIntegrationTest {
|
||||
String fileName = "files/mr-mrs.pdf";
|
||||
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource(fileName);
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
AnalyzeRequest request = prepareStorage(fileName);
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
analyzeService.analyze(request);
|
||||
@ -1866,7 +1950,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private AnalyzeRequest prepareStorage(InputStream stream) {
|
||||
private AnalyzeRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream) {
|
||||
|
||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
||||
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
|
||||
@ -1875,7 +1959,8 @@ public class RedactionIntegrationTest {
|
||||
.lastProcessed(OffsetDateTime.now())
|
||||
.build();
|
||||
|
||||
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), stream);
|
||||
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES), cvServiceResponseFileStream);
|
||||
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), fileStream);
|
||||
|
||||
return request;
|
||||
|
||||
@ -1885,9 +1970,16 @@ public class RedactionIntegrationTest {
|
||||
@SneakyThrows
|
||||
private AnalyzeRequest prepareStorage(String file) {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource(file);
|
||||
return prepareStorage(file, "files/cv_service_empty_response.json");
|
||||
}
|
||||
|
||||
return prepareStorage(pdfFileResource.getInputStream());
|
||||
@SneakyThrows
|
||||
private AnalyzeRequest prepareStorage(String file, String cvServiceResponseFile) {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource(file);
|
||||
ClassPathResource cvServiceResponseFileResource = new ClassPathResource(cvServiceResponseFile);
|
||||
|
||||
return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -555,6 +555,7 @@ public class RulesTest {
|
||||
.lastProcessed(OffsetDateTime.now())
|
||||
.build();
|
||||
|
||||
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES), new ClassPathResource("files/cv_service_empty_response.json").getInputStream());
|
||||
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), stream);
|
||||
|
||||
return request;
|
||||
|
||||
@ -20,14 +20,18 @@ import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
|
||||
import com.amazonaws.services.s3.AmazonS3;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.redaction.v1.server.Application;
|
||||
import com.iqser.red.service.redaction.v1.server.FileSystemBackedStorageService;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
@ -35,10 +39,13 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.RedRectangle2D;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.image.ImageServiceResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@ -71,13 +78,24 @@ public class PdfSegmentationServiceTest {
|
||||
@MockBean
|
||||
private LegalBasisClient legalBasisClient;
|
||||
|
||||
@Autowired
|
||||
private StorageService storageService;
|
||||
|
||||
@Autowired
|
||||
private ObjectMapper objectMapper;
|
||||
|
||||
@Configuration
|
||||
@EnableAutoConfiguration(exclude = { RabbitAutoConfiguration.class})
|
||||
public static class TestConfiguration {
|
||||
private final static String TEST_DOSSIER_ID = "123";
|
||||
private final static String TEST_FILE_ID = "123";
|
||||
|
||||
@Configuration
|
||||
@EnableAutoConfiguration(exclude = { RabbitAutoConfiguration.class, StorageAutoConfiguration.class})
|
||||
public static class TestConfiguration {
|
||||
@Bean
|
||||
@Primary
|
||||
public StorageService inmemoryStorage() {
|
||||
|
||||
return new FileSystemBackedStorageService();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -85,6 +103,7 @@ public class PdfSegmentationServiceTest {
|
||||
@SneakyThrows
|
||||
public void testMapping() {
|
||||
|
||||
prepareStorage();
|
||||
ClassPathResource responseJson = new ClassPathResource("files/image_response.json");
|
||||
ImageServiceResponse imageServiceResponse = objectMapper.readValue(responseJson.getInputStream(), ImageServiceResponse.class);
|
||||
|
||||
@ -103,9 +122,10 @@ public class PdfSegmentationServiceTest {
|
||||
@Test
|
||||
public void testPDFSegmentationWithComplexTable() throws IOException {
|
||||
|
||||
prepareStorage();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf");
|
||||
|
||||
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream(), null);
|
||||
Document document = pdfSegmentationService.parseDocument(TEST_DOSSIER_ID, TEST_FILE_ID, pdfFileResource.getInputStream(), null);
|
||||
assertThat(document.getParagraphs()
|
||||
.stream()
|
||||
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||
@ -124,9 +144,10 @@ public class PdfSegmentationServiceTest {
|
||||
@Test
|
||||
public void testTableExtraction() throws IOException {
|
||||
|
||||
prepareStorage();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Table.pdf");
|
||||
|
||||
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream(), null);
|
||||
Document document = pdfSegmentationService.parseDocument(TEST_DOSSIER_ID, TEST_FILE_ID, pdfFileResource.getInputStream(), null);
|
||||
assertThat(document.getParagraphs()
|
||||
.stream()
|
||||
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||
@ -162,9 +183,10 @@ public class PdfSegmentationServiceTest {
|
||||
@Test
|
||||
public void testMultiPageMetadataPropagation() throws IOException {
|
||||
|
||||
prepareStorage();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Multi Page Table.pdf");
|
||||
|
||||
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream(), null);
|
||||
Document document = pdfSegmentationService.parseDocument(TEST_DOSSIER_ID, TEST_FILE_ID, pdfFileResource.getInputStream(), null);
|
||||
assertThat(document.getParagraphs()
|
||||
.stream()
|
||||
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||
@ -200,9 +222,10 @@ public class PdfSegmentationServiceTest {
|
||||
@Test
|
||||
public void testHeaderCellsForRotatedTable() throws IOException {
|
||||
|
||||
prepareStorage();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Rotated Table Headers.pdf");
|
||||
|
||||
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream(), null);
|
||||
Document document = pdfSegmentationService.parseDocument(TEST_DOSSIER_ID, TEST_FILE_ID, pdfFileResource.getInputStream(), null);
|
||||
assertThat(document.getParagraphs()
|
||||
.stream()
|
||||
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||
@ -235,4 +258,10 @@ public class PdfSegmentationServiceTest {
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void prepareStorage() {
|
||||
|
||||
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES), new ClassPathResource("files/cv_service_empty_response.json").getInputStream());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
File diff suppressed because one or more lines are too long
Binary file not shown.
@ -0,0 +1,8 @@
|
||||
{
|
||||
"dossierId": "123",
|
||||
"fileId": "123",
|
||||
"operation": "table",
|
||||
"targetFileExtension": "ORIGIN.pdf.gz",
|
||||
"responseFileExtension": "TABLES.json.gz",
|
||||
"data": []
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user