Pull request #483: RED-5028: Integrated cv table service

Merge in RED/redaction-service from RED-5028 to master

* commit 'f6bc49d42c65a8580a5558891cabd4738af01d87':
  RED-5028: Integrated cv table service
This commit is contained in:
Philipp Schramm 2022-10-11 09:03:07 +02:00
commit 9d88925ff1
23 changed files with 72323 additions and 126 deletions

View File

@ -12,7 +12,7 @@
<artifactId>redaction-service-api-v1</artifactId>
<properties>
<persistence-service.version>1.254.0</persistence-service.version>
<persistence-service.version>1.299.0</persistence-service.version>
</properties>
<dependencies>

View File

@ -2,6 +2,13 @@ package com.iqser.red.service.redaction.v1.server.classification.service;
import static java.util.stream.Collectors.toSet;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.classification.model.FloatFrequencyCounter;
import com.iqser.red.service.redaction.v1.server.classification.model.Orientation;
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
@ -15,13 +22,6 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
@Service
@SuppressWarnings("all")
public class BlockificationService {
@ -48,10 +48,12 @@ public class BlockificationService {
boolean startFromTop = word.getY1() > maxY + word.getHeight();
boolean splitByX = prev != null && maxX + 50 < word.getX1() && prev.getY1() == word.getY1();
boolean newLineAfterSplit = prev != null && word.getY1() != prev.getY1() && wasSplitted && splitX1 != word.getX1();
boolean splittedByRuling = word.getRotation() == 0 && isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), verticalRulingLines) || word
.getRotation() == 0 && isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), horizontalRulingLines) || word
.getRotation() == 90 && isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), horizontalRulingLines) || word
.getRotation() == 90 && isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), verticalRulingLines);
boolean splittedByRuling =
isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), verticalRulingLines) ||
isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), horizontalRulingLines)
|| isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), horizontalRulingLines)
|| isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), verticalRulingLines);
if (prev != null && (lineSeparation || startFromTop || splitByX || newLineAfterSplit || splittedByRuling)) {

View File

@ -48,7 +48,7 @@ public class RedactionController implements RedactionResource {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
try {
Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null);
Document classifiedDoc = pdfSegmentationService.parseDocument(redactionRequest.getDossierId(), redactionRequest.getFileId(), storedObjectStream, null);
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
@ -74,7 +74,7 @@ public class RedactionController implements RedactionResource {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
try {
Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null);
Document classifiedDoc = pdfSegmentationService.parseDocument(redactionRequest.getDossierId(), redactionRequest.getFileId(), storedObjectStream, null);
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
@ -101,7 +101,7 @@ public class RedactionController implements RedactionResource {
try {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, null);
classifiedDoc = pdfSegmentationService.parseDocument(redactionRequest.getDossierId(), redactionRequest.getFileId(), storedObjectStream, null);
} catch (Exception e) {
throw new RedactionException(e);
}

View File

@ -0,0 +1,17 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.util.ArrayList;
import java.util.List;
import lombok.Data;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
@Data
@RequiredArgsConstructor
public class PdfTable {
@NonNull
private List<PdfTableCell> tableCells = new ArrayList<>();
}

View File

@ -0,0 +1,21 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.RequiredArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@RequiredArgsConstructor
public class PdfTableCell {
private float x0;
private float y0;
private float x1;
private float y1;
private float width;
private float height;
}

View File

@ -0,0 +1,16 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.table;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data
@CompiledJson
public class PageInfo {
private int number;
private int rotation;
private float width;
private float height;
}

View File

@ -0,0 +1,18 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.table;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data
@CompiledJson
public class TableCells {
private float x0;
private float y0;
private float x1;
private float y1;
private float width;
private float height;
}

View File

@ -0,0 +1,17 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.table;
import java.util.ArrayList;
import java.util.List;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data
@CompiledJson
public class TableData {
private PageInfo pageInfo;
private List<TableCells> tableCells = new ArrayList<>();
}

View File

@ -0,0 +1,21 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.table;
import java.util.ArrayList;
import java.util.List;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data
@CompiledJson
public class TableServiceResponse {
private String dossierId;
private String fileId;
private String operation;
private String targetFileExtension;
private String responseFileExtension;
private List<TableData> data = new ArrayList<>();
}

View File

@ -96,7 +96,8 @@ public class AnalyzeService {
if (redactionServiceSettings.isEnableImageClassification()) {
pdfImages = imageService.convertImages(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
}
classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, pdfImages);
classifiedDoc = pdfSegmentationService.parseDocument(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), storedObjectStream, pdfImages);
pageCount = classifiedDoc.getPages().size();
} catch (Exception e) {
throw new RedactionException(e);

View File

@ -9,6 +9,7 @@ import java.nio.file.attribute.FileAttribute;
import java.nio.file.attribute.PosixFilePermission;
import java.nio.file.attribute.PosixFilePermissions;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
@ -29,6 +30,8 @@ import com.iqser.red.service.redaction.v1.server.classification.service.Classifi
import com.iqser.red.service.redaction.v1.server.parsing.PDFLinesTextStripper;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfTableCell;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
@ -42,15 +45,17 @@ import lombok.extern.slf4j.Slf4j;
@RequiredArgsConstructor
public class PdfSegmentationService {
private final RedactionServiceSettings redactionServiceSettings;
private final RulingCleaningService rulingCleaningService;
private final TableExtractionService tableExtractionService;
private final BlockificationService blockificationService;
private final ClassificationService classificationService;
private final SectionsBuilderService sectionsBuilderService;
private final ImageService imageService;
private final TableService tableService;
public Document parseDocument(InputStream documentInputStream, Map<Integer, List<PdfImage>> pdfImages) throws IOException {
public Document parseDocument(String dossierId, String fileId, InputStream documentInputStream, Map<Integer, List<PdfImage>> pdfImages) throws IOException {
PDDocument pdDocument = null;
try {
@ -67,6 +72,11 @@ public class PdfSegmentationService {
tempFile.setExecutable(true, true);
}
Map<Integer, List<PdfTableCell>> pdfTableCells = new HashMap<>();
if (redactionServiceSettings.isCvServiceEnabled()) {
pdfTableCells = tableService.convertTables(dossierId, fileId);
}
try (var fos = new FileOutputStream(tempFile)) {
IOUtils.copy(documentInputStream, fos);
@ -94,12 +104,8 @@ public class PdfSegmentationService {
int rotation = pdPage.getRotation();
boolean isRotated = rotation != 0 && rotation != 360;
CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(stripper.getRulings(), stripper.getMinCharWidth(), stripper
.getMaxCharHeight());
Page page = blockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings
.getVertical());
CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(pdfTableCells.get(pageNumber), stripper.getRulings(), stripper.getMinCharWidth(), stripper.getMaxCharHeight());
Page page = blockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
PDRectangle cropbox = pdPage.getCropBox();
float cropboxArea = cropbox.getHeight() * cropbox.getWidth();
@ -109,7 +115,6 @@ public class PdfSegmentationService {
page.setLandscape(isLandscape || isRotated);
page.setPageNumber(pageNumber);
tableExtractionService.extractTables(cleanRulings, page);
buildPageStatistics(page);
increaseDocumentStatistics(page, document);
@ -128,7 +133,6 @@ public class PdfSegmentationService {
sectionsBuilderService.buildSections(document);
sectionsBuilderService.addImagesToSections(document);
IOUtils.close(pdDocument);
if (!tempFile.delete()) {

View File

@ -0,0 +1,54 @@
package com.iqser.red.service.redaction.v1.server.segmentation;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfTableCell;
import com.iqser.red.service.redaction.v1.server.redaction.model.table.TableCells;
import com.iqser.red.service.redaction.v1.server.redaction.model.table.TableServiceResponse;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class TableService {
private final ObjectMapper objectMapper;
private final RedactionStorageService redactionStorageService;
@SneakyThrows
public Map<Integer, List<PdfTableCell>> convertTables(String dossierId, String fileId) {
var tableClassificationStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(dossierId, fileId, FileType.TABLES));
TableServiceResponse tableServiceResponse = objectMapper.readValue(tableClassificationStream, TableServiceResponse.class);
Map<Integer, List<PdfTableCell>> tableCells = new HashMap<>();
tableServiceResponse.getData().forEach(tableData -> tableCells.computeIfAbsent(tableData.getPageInfo().getNumber(), tableCell -> new ArrayList<>()).addAll(convertTableCells(tableData.getTableCells())));
return tableCells;
}
private Collection<? extends PdfTableCell> convertTableCells(List<TableCells> tableCells) {
List<PdfTableCell> pdfTableCells = new ArrayList<>();
tableCells.forEach(t -> pdfTableCells.add(PdfTableCell.builder().y0(t.getY0()).x1(t.getX1()).y1(t.getY1()).x0(t.getX0()).width(t.getWidth()).height(t.getHeight()).build()));
return pdfTableCells;
}
}

View File

@ -14,6 +14,8 @@ public class RedactionServiceSettings {
private boolean enableImageClassification = true;
private boolean cvServiceEnabled = true;
private float maxImageCropboxRatio = 0.9f;
private int analysisVersion = 1;

View File

@ -1,13 +1,22 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.collections4.CollectionUtils;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import java.util.*;
@Slf4j
public class Table extends AbstractTextContainer {
@ -160,7 +169,7 @@ public class Table extends AbstractTextContainer {
for (int i = unrotatedColCount - 1; i >= 0; i--) { // rows
List<Cell> lastRow = new ArrayList<>();
for (int j = 0; j < unrotatedRowCount; j++) { // cols
Cell cell = cells.get(new CellPosition(i, j));
Cell cell = cells.get(new CellPosition(j, i));
if (cell != null) {
lastRow.add(cell);
}

View File

@ -1,18 +1,34 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.service;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
import org.springframework.stereotype.Service;
import java.awt.geom.Line2D;
import java.awt.geom.Point2D;
import java.util.*;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfTableCell;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class RulingCleaningService {
public CleanRulings getCleanRulings(List<Ruling> rulings, float minCharWidth, float maxCharHeight) {
private final RedactionServiceSettings redactionServiceSettings;
public CleanRulings getCleanRulings(List<PdfTableCell> pdfTableCells, List<Ruling> rulings, float minCharWidth, float maxCharHeight) {
if (!rulings.isEmpty()) {
snapPoints(rulings, minCharWidth, maxCharHeight);
@ -24,6 +40,9 @@ public class RulingCleaningService {
vrs.add(vr);
}
}
if (vrs.isEmpty() && redactionServiceSettings.isCvServiceEnabled()) {
vrs.addAll(extractVerticalRulings(pdfTableCells));
}
List<Ruling> verticalRulingLines = collapseOrientedRulings(vrs);
List<Ruling> hrs = new ArrayList<>();
@ -32,6 +51,9 @@ public class RulingCleaningService {
hrs.add(hr);
}
}
if (hrs.isEmpty() && redactionServiceSettings.isCvServiceEnabled()) {
hrs.addAll(extractHorizontalRulings(pdfTableCells));
}
List<Ruling> horizontalRulingLines = collapseOrientedRulings(hrs);
return CleanRulings.builder().vertical(verticalRulingLines).horizontal(horizontalRulingLines).build();
@ -113,6 +135,60 @@ public class RulingCleaningService {
}
private Collection<? extends Ruling> extractVerticalRulings(List<PdfTableCell> pdfTableCells) {
List<Ruling> vrs = new ArrayList<>();
if (pdfTableCells != null) {
for (PdfTableCell pdfTableCell : pdfTableCells) {
Ruling leftLine = createRuling(pdfTableCell.getX0(), pdfTableCell.getX0(), pdfTableCell.getY0(), pdfTableCell.getY1());
Ruling rightLine = createRuling(pdfTableCell.getX1(), pdfTableCell.getX1(), pdfTableCell.getY0(), pdfTableCell.getY1());
vrs.add(leftLine);
vrs.add(rightLine);
}
}
return vrs;
}
private Collection<? extends Ruling> extractHorizontalRulings(List<PdfTableCell> pdfTableCells) {
List<Ruling> hrs = new ArrayList<>();
if (pdfTableCells != null) {
for (PdfTableCell pdfTableCell : pdfTableCells) {
Ruling topLine = createRuling(pdfTableCell.getX0(), pdfTableCell.getX1(), pdfTableCell.getY1(), pdfTableCell.getY1());
Ruling baseLine = createRuling(pdfTableCell.getX0(), pdfTableCell.getX1(), pdfTableCell.getY0(), pdfTableCell.getY0());
hrs.add(topLine);
hrs.add(baseLine);
}
}
return hrs;
}
private Ruling createRuling(float tableCellX0, float tableCellX1, float tableCellY0, float tableCellY1) {
float x0 = tableCellX0;
float x1 = tableCellX1;
float y0 = tableCellY0;
float y1 = tableCellY1;
if (x1 < x0) {
x0 = tableCellX1;
x1 = tableCellX0;
}
if (y1 < y0) {
y0 = tableCellY1;
y1 = tableCellY0;
}
return new Ruling(new Point2D.Float(x0, y0), new Point2D.Float(x1, y1));
}
private List<Ruling> collapseOrientedRulings(List<Ruling> lines) {
int COLINEAR_OR_PARALLEL_PIXEL_EXPAND_AMOUNT = 1;

View File

@ -1,36 +1,29 @@
package com.iqser.red.service.redaction.v1.server;
import com.amazonaws.services.s3.AmazonS3;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Comment;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.*;
import com.iqser.red.service.persistence.service.v1.api.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.model.*;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import lombok.SneakyThrows;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import org.junit.After;
import org.junit.Before;
import org.junit.Ignore;
@ -55,16 +48,49 @@ import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit4.SpringRunner;
import java.io.*;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.util.*;
import java.util.stream.Collectors;
import com.amazonaws.services.s3.AmazonS3;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Comment;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualLegalBasisChange;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.persistence.service.v1.api.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
import com.iqser.red.service.redaction.v1.model.FileAttribute;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
import com.iqser.red.service.redaction.v1.model.RedactionResult;
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotationService;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;
import lombok.SneakyThrows;
@RunWith(SpringRunner.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@ -268,9 +294,8 @@ public class RedactionIntegrationTest {
public void testMergedImages() throws IOException {
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/merge_images.pdf");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeRequest request = prepareStorage("files/Minimal Examples/merge_images.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -313,8 +338,7 @@ public class RedactionIntegrationTest {
// F. Lastname, J. Doe, M. Mustermann
// Lastname M., Doe J., Mustermann M.
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/ExpansionTest.pdf");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeRequest request = prepareStorage("files/Minimal Examples/ExpansionTest.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -334,8 +358,7 @@ public class RedactionIntegrationTest {
@Test
public void titleExtraction() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/RSS/32 - Emamectin Benzoate Technical - Acute Oral Toxicity - Mouse.pdf");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeRequest request = prepareStorage("files/RSS/32 - Emamectin Benzoate Technical - Acute Oral Toxicity - Mouse.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -367,8 +390,7 @@ public class RedactionIntegrationTest {
System.out.println("testIgnoreHint");
ClassPathResource pdfFileResource = new ClassPathResource("files/new/test-ignore-hint.pdf");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeRequest request = prepareStorage("files/new/test-ignore-hint.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
analyzeService.analyze(request);
@ -426,7 +448,7 @@ public class RedactionIntegrationTest {
}
for (File path : input) {
AnalyzeRequest request = prepareStorage(new FileInputStream((path)));
AnalyzeRequest request = prepareStorage(path.getPath());
System.out.println("Redacting file : " + path.getName());
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
@ -471,8 +493,7 @@ public class RedactionIntegrationTest {
String outputFileName = OsUtils.getTemporaryDirectory() + "/AnnotatedRedactionTestSeparatedRedaction.pdf";
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource(fileName);
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeRequest request = prepareStorage(fileName);
request.setExcludedPages(Set.of(1));
request.setFileAttributes(List.of(FileAttribute.builder()
@ -582,8 +603,7 @@ public class RedactionIntegrationTest {
String fileName = "files/new/test1S1T1.pdf";
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
ClassPathResource pdfFileResource = new ClassPathResource(fileName);
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeRequest request = prepareStorage(fileName);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
analyzeService.analyze(request);
@ -637,8 +657,7 @@ public class RedactionIntegrationTest {
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.NER_ENTITIES), responseJson.getInputStream());
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource(fileName);
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeRequest request = prepareStorage(fileName);
request.setExcludedPages(Set.of(1));
request.setFileAttributes(List.of(FileAttribute.builder()
@ -836,6 +855,60 @@ public class RedactionIntegrationTest {
}
@Test
public void testTableRedactionWithCvTableService() throws IOException {
long start = System.currentTimeMillis();
String fileName = "files/new/VV-511309.pdf";
String tableServiceResponseFile = "files/cv_table_response_VV-511309.json";
AnalyzeRequest request = prepareStorage(fileName, tableServiceResponseFile);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Annotated.pdf")) {
fileOutputStream.write(annotateResponse.getDocument());
}
long end = System.currentTimeMillis();
System.out.println("duration: " + (end - start));
System.out.println("numberOfPages: " + result.getNumberOfPages());
}
@Test
public void testTableRedactionWithOcrAndCvTableService() throws IOException {
long start = System.currentTimeMillis();
String fileName = "files/new/VV-511309_OCR.pdf";
String tableServiceResponseFile = "files/cv_table_response_VV-511309.json";
AnalyzeRequest request = prepareStorage(fileName, tableServiceResponseFile);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Annotated.pdf")) {
fileOutputStream.write(annotateResponse.getDocument());
}
long end = System.currentTimeMillis();
System.out.println("duration: " + (end - start));
System.out.println("numberOfPages: " + result.getNumberOfPages());
}
@Test
public void testUnicodeProblem() throws IOException {
@ -1005,7 +1078,7 @@ public class RedactionIntegrationTest {
System.out.println("testManualRedaction");
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
String pdfFile = "files/Minimal Examples/Single Table.pdf";
ManualRedactions manualRedactions = new ManualRedactions();
@ -1053,7 +1126,7 @@ public class RedactionIntegrationTest {
.page(1)
.build()));
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeRequest request = prepareStorage(pdfFile);
request.setManualRedactions(manualRedactions);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -1094,9 +1167,8 @@ public class RedactionIntegrationTest {
public void classificationTest() throws IOException {
System.out.println("classificationTest");
ClassPathResource pdfFileResource = new ClassPathResource("files/RSS/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
RedactionRequest redactionRequest = RedactionRequest.builder()
.dossierId(request.getDossierId())
@ -1111,14 +1183,34 @@ public class RedactionIntegrationTest {
}
}
@Test
public void classificationTestWithCvTableService() throws IOException {
System.out.println("classificationTest");
String tableServiceResponseFile = "files/cv_table_response_VV-511309.json";
AnalyzeRequest request = prepareStorage("files/new/VV-511309_OCR.pdf", tableServiceResponseFile);
RedactionRequest redactionRequest = RedactionRequest.builder()
.dossierId(request.getDossierId())
.fileId(request.getFileId())
.dossierTemplateId(request.getDossierTemplateId())
.build();
RedactionResult result = redactionController.classify(redactionRequest);
try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Classified.pdf")) {
fileOutputStream.write(result.getDocument());
}
}
@Test
public void sectionsTest() throws IOException {
System.out.println("sectionsTest");
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeRequest request = prepareStorage("files/Minimal Examples/Single Table.pdf");
RedactionRequest redactionRequest = RedactionRequest.builder()
.dossierId(request.getDossierId())
@ -1138,9 +1230,7 @@ public class RedactionIntegrationTest {
public void htmlTablesTest() throws IOException {
System.out.println("htmlTablesTest");
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeRequest request = prepareStorage("files/Minimal Examples/Single Table.pdf");
RedactionRequest redactionRequest = RedactionRequest.builder()
.dossierId(request.getDossierId())
@ -1160,9 +1250,8 @@ public class RedactionIntegrationTest {
public void htmlTableRotationTest() throws IOException {
System.out.println("htmlTableRotationTest");
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
RedactionRequest redactionRequest = RedactionRequest.builder()
.dossierId(request.getDossierId())
@ -1181,9 +1270,7 @@ public class RedactionIntegrationTest {
@Test
public void phantomCellsDocumentTest() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Phantom Cells.pdf");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeRequest request = prepareStorage("files/Minimal Examples/Phantom Cells.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -1202,9 +1289,8 @@ public class RedactionIntegrationTest {
public void sponsorCompanyTest() throws IOException {
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/sponsor_companies.pdf");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeRequest request = prepareStorage("files/Minimal Examples/sponsor_companies.pdf");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -1228,7 +1314,7 @@ public class RedactionIntegrationTest {
@Ignore
public void resizeRedactionTest() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
String pdfFile = "files/Minimal Examples/Single Table.pdf";
ManualRedactions manualRedactions = new ManualRedactions();
@ -1284,7 +1370,7 @@ public class RedactionIntegrationTest {
// manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeRequest request = prepareStorage(pdfFile);
request.setManualRedactions(manualRedactions);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -1384,7 +1470,7 @@ public class RedactionIntegrationTest {
@Ignore
public void testManualSurroundingText() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/new/S4.pdf");
String pdfFile = "files/new/S4.pdf";
ManualRedactions manualRedactions = new ManualRedactions();
@ -1437,7 +1523,7 @@ public class RedactionIntegrationTest {
manualRedactions.getEntriesToAdd().add(manualRedactionEntry2);
manualRedactions.getEntriesToAdd().add(manualRedactionEntry3);
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeRequest request = prepareStorage(pdfFile);
request.setManualRedactions(manualRedactions);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
@ -1791,10 +1877,9 @@ public class RedactionIntegrationTest {
public void testImportedRedactions() throws IOException {
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
ClassPathResource pdfFileResource = new ClassPathResource("files/ImportedRedactions/RotateTestFile_without_highlights.pdf");
ClassPathResource importedRedactions = new ClassPathResource("files/ImportedRedactions/RotateTestFile_without_highlights.IMPORTED_REDACTIONS.json");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeRequest request = prepareStorage("files/ImportedRedactions/RotateTestFile_without_highlights.pdf");
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), importedRedactions.getInputStream());
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
@ -1838,8 +1923,7 @@ public class RedactionIntegrationTest {
String fileName = "files/mr-mrs.pdf";
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
ClassPathResource pdfFileResource = new ClassPathResource(fileName);
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeRequest request = prepareStorage(fileName);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
analyzeService.analyze(request);
@ -1866,7 +1950,7 @@ public class RedactionIntegrationTest {
@SneakyThrows
private AnalyzeRequest prepareStorage(InputStream stream) {
private AnalyzeRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream) {
AnalyzeRequest request = AnalyzeRequest.builder()
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
@ -1875,7 +1959,8 @@ public class RedactionIntegrationTest {
.lastProcessed(OffsetDateTime.now())
.build();
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), stream);
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES), cvServiceResponseFileStream);
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), fileStream);
return request;
@ -1885,9 +1970,16 @@ public class RedactionIntegrationTest {
@SneakyThrows
private AnalyzeRequest prepareStorage(String file) {
ClassPathResource pdfFileResource = new ClassPathResource(file);
return prepareStorage(file, "files/cv_service_empty_response.json");
}
return prepareStorage(pdfFileResource.getInputStream());
@SneakyThrows
private AnalyzeRequest prepareStorage(String file, String cvServiceResponseFile) {
ClassPathResource pdfFileResource = new ClassPathResource(file);
ClassPathResource cvServiceResponseFileResource = new ClassPathResource(cvServiceResponseFile);
return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream());
}
}

View File

@ -555,6 +555,7 @@ public class RulesTest {
.lastProcessed(OffsetDateTime.now())
.build();
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES), new ClassPathResource("files/cv_service_empty_response.json").getInputStream());
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), stream);
return request;

View File

@ -20,14 +20,18 @@ import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit4.SpringRunner;
import com.amazonaws.services.s3.AmazonS3;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.redaction.v1.server.Application;
import com.iqser.red.service.redaction.v1.server.FileSystemBackedStorageService;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
@ -35,10 +39,13 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
import com.iqser.red.service.redaction.v1.server.redaction.model.RedRectangle2D;
import com.iqser.red.service.redaction.v1.server.redaction.model.image.ImageServiceResponse;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import lombok.SneakyThrows;
@ -71,13 +78,24 @@ public class PdfSegmentationServiceTest {
@MockBean
private LegalBasisClient legalBasisClient;
@Autowired
private StorageService storageService;
@Autowired
private ObjectMapper objectMapper;
@Configuration
@EnableAutoConfiguration(exclude = { RabbitAutoConfiguration.class})
public static class TestConfiguration {
private final static String TEST_DOSSIER_ID = "123";
private final static String TEST_FILE_ID = "123";
@Configuration
@EnableAutoConfiguration(exclude = { RabbitAutoConfiguration.class, StorageAutoConfiguration.class})
public static class TestConfiguration {
@Bean
@Primary
public StorageService inmemoryStorage() {
return new FileSystemBackedStorageService();
}
}
@ -85,6 +103,7 @@ public class PdfSegmentationServiceTest {
@SneakyThrows
public void testMapping() {
prepareStorage();
ClassPathResource responseJson = new ClassPathResource("files/image_response.json");
ImageServiceResponse imageServiceResponse = objectMapper.readValue(responseJson.getInputStream(), ImageServiceResponse.class);
@ -103,9 +122,10 @@ public class PdfSegmentationServiceTest {
@Test
public void testPDFSegmentationWithComplexTable() throws IOException {
prepareStorage();
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf");
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream(), null);
Document document = pdfSegmentationService.parseDocument(TEST_DOSSIER_ID, TEST_FILE_ID, pdfFileResource.getInputStream(), null);
assertThat(document.getParagraphs()
.stream()
.flatMap(paragraph -> paragraph.getTables().stream())
@ -124,9 +144,10 @@ public class PdfSegmentationServiceTest {
@Test
public void testTableExtraction() throws IOException {
prepareStorage();
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Table.pdf");
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream(), null);
Document document = pdfSegmentationService.parseDocument(TEST_DOSSIER_ID, TEST_FILE_ID, pdfFileResource.getInputStream(), null);
assertThat(document.getParagraphs()
.stream()
.flatMap(paragraph -> paragraph.getTables().stream())
@ -162,9 +183,10 @@ public class PdfSegmentationServiceTest {
@Test
public void testMultiPageMetadataPropagation() throws IOException {
prepareStorage();
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Multi Page Table.pdf");
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream(), null);
Document document = pdfSegmentationService.parseDocument(TEST_DOSSIER_ID, TEST_FILE_ID, pdfFileResource.getInputStream(), null);
assertThat(document.getParagraphs()
.stream()
.flatMap(paragraph -> paragraph.getTables().stream())
@ -200,9 +222,10 @@ public class PdfSegmentationServiceTest {
@Test
public void testHeaderCellsForRotatedTable() throws IOException {
prepareStorage();
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Rotated Table Headers.pdf");
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream(), null);
Document document = pdfSegmentationService.parseDocument(TEST_DOSSIER_ID, TEST_FILE_ID, pdfFileResource.getInputStream(), null);
assertThat(document.getParagraphs()
.stream()
.flatMap(paragraph -> paragraph.getTables().stream())
@ -235,4 +258,10 @@ public class PdfSegmentationServiceTest {
}
@SneakyThrows
private void prepareStorage() {
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.TABLES), new ClassPathResource("files/cv_service_empty_response.json").getInputStream());
}
}

View File

@ -0,0 +1,8 @@
{
"dossierId": "123",
"fileId": "123",
"operation": "table",
"targetFileExtension": "ORIGIN.pdf.gz",
"responseFileExtension": "TABLES.json.gz",
"data": []
}