Pull request #282: Bugfix/RED-2756 2.52.X

Merge in RED/redaction-service from bugfix/RED-2756_2.52.X to release/2.52.x

* commit '8a763edc2bc53d2e61efd7155b7af53272e114ef':
  RED-2756 Bugfix for temporary files for Windows systems
  RED-2756 Bugfix for 'Redaction is not continuous', compare line height and y position instead of rounding y values
This commit is contained in:
Philipp Schramm 2021-11-30 13:21:34 +01:00 committed by Dominique Eiflaender
commit bea9abc30c
2 changed files with 78 additions and 37 deletions

View File

@ -1,5 +1,15 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.collections4.CollectionUtils;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.model.CellRectangle;
import com.iqser.red.service.redaction.v1.model.Point;
import com.iqser.red.service.redaction.v1.model.Rectangle;
@ -17,16 +27,8 @@ import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import lombok.RequiredArgsConstructor;
import org.apache.commons.collections4.CollectionUtils;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor;
@Service
@RequiredArgsConstructor
@ -54,7 +56,8 @@ public class RedactionLogCreatorService {
}
public List<RedactionLogEntry> addImageEntries(Map<Integer, Set<Image>> images, int pageNumber, String dossierTemplateId) {
public List<RedactionLogEntry> addImageEntries(Map<Integer, Set<Image>> images, int pageNumber,
String dossierTemplateId) {
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
@ -81,11 +84,10 @@ public class RedactionLogCreatorService {
.sectionNumber(image.getSectionNumber())
//.section(image.getSection())
// RED-2622
.section("Image:"+image.getType())
.section("Image:" + image.getType())
.imageHasTransparency(image.isHasTransparency())
.build();
redactionLogEntities.add(redactionLogEntry);
}
@ -103,7 +105,6 @@ public class RedactionLogCreatorService {
entityLoop:
for (Entity entity : entities.get(page)) {
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(entity, dossierTemplateId);
@ -123,12 +124,10 @@ public class RedactionLogCreatorService {
.flatMap(seq -> seq.getTextPositions().stream())
.collect(Collectors.toList()), page);
redactionLogEntry.getPositions().addAll(rectanglesPerLine);
}
// FIXME ids should never be null. Figure out why this happens.
if (redactionLogEntry.getId() != null) {
redactionLogEntities.add(redactionLogEntry);
@ -146,16 +145,27 @@ public class RedactionLogCreatorService {
if (textPositions.size() == 1) {
rectangles.add(TextPositionSequence.fromData(textPositions, page).getRectangle());
} else {
float x = textPositions.get(0).getXDirAdj();
float y = textPositions.get(0).getYDirAdj();
float width = textPositions.get(0).getWidth();
float height = textPositions.get(0).getHeightDir();
int startIndex = 0;
for (int i = 1; i < textPositions.size(); i++) {
float xDirAdj = textPositions.get(i).getXDirAdj();
float yDirAdj = textPositions.get(i).getYDirAdj();
if (round(yDirAdj,3) != round(y, 3)) {
float widthDir = textPositions.get(i).getWidth();
float heightDir = textPositions.get(i).getHeightDir();
if (!(isCharInSameLine(y, yDirAdj, height, heightDir) && isCharClose(x, xDirAdj, width))) {
rectangles.add(TextPositionSequence.fromData(textPositions.subList(startIndex, i), page)
.getRectangle());
y = yDirAdj;
width = widthDir;
height = heightDir;
startIndex = i;
}
x = xDirAdj;
}
if (startIndex != textPositions.size()) {
rectangles.add(TextPositionSequence.fromData(textPositions.subList(startIndex, textPositions.size()), page)
@ -166,9 +176,32 @@ public class RedactionLogCreatorService {
return rectangles;
}
private double round(float value, int decimalPoints) {
var d = Math.pow(10, decimalPoints);
return Math.round(value * d) / d;
private boolean isCharClose(float x, float xDirAdj, float width) {
float max = x + (5 * width);
if (xDirAdj < max) {
return true;
}
return false;
}
private boolean isCharInSameLine(float y, float yCompare, float height, float heightCompare) {
float offsetHeight = height / 2;
float minHeight = height - offsetHeight;
float maxHeight = height + offsetHeight;
float offsetY = height / 10;
float minY = y - offsetY;
float maxY = y + offsetY;
if (yCompare > minY && yCompare < maxY && heightCompare > minHeight && heightCompare < maxHeight) {
return true;
}
return false;
}
@ -213,8 +246,8 @@ public class RedactionLogCreatorService {
classifiedDoc.getSectionGrid()
.getRectanglesPerPage()
.computeIfAbsent(page, (x) -> new ArrayList<>())
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock
.getHeight(), i + 1, paragraph.getPageBlocks().size()));
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock.getHeight(), i + 1, paragraph.getPageBlocks()
.size()));
} else if (textBlock instanceof Table) {
@ -222,8 +255,7 @@ public class RedactionLogCreatorService {
for (List<Cell> row : ((Table) textBlock).getRows()) {
for (Cell cell : row) {
if (cell != null) {
cellRectangles.add(new CellRectangle(new Point((float) cell.getX(), (float) cell.getY()), (float) cell
.getWidth(), (float) cell.getHeight()));
cellRectangles.add(new CellRectangle(new Point((float) cell.getX(), (float) cell.getY()), (float) cell.getWidth(), (float) cell.getHeight()));
}
}
}
@ -231,8 +263,8 @@ public class RedactionLogCreatorService {
classifiedDoc.getSectionGrid()
.getRectanglesPerPage()
.computeIfAbsent(page, (x) -> new ArrayList<>())
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock
.getHeight(), i + 1, paragraph.getPageBlocks().size(), cellRectangles));
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock.getHeight(), i + 1, paragraph.getPageBlocks()
.size(), cellRectangles));
}
}
@ -260,5 +292,4 @@ public class RedactionLogCreatorService {
return dictionaryService.isRecommendation(type, dossierTemplateId);
}
}

View File

@ -22,6 +22,7 @@ import com.iqser.red.storage.commons.service.StorageService;
import lombok.SneakyThrows;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Ignore;
@ -569,7 +570,7 @@ public class RedactionIntegrationTest {
.fileId(TEST_FILE_ID)
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated3.pdf")) {
try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Annotated3.pdf")) {
fileOutputStream.write(annotateResponse.getDocument());
}
long rstart = System.currentTimeMillis();
@ -682,7 +683,7 @@ public class RedactionIntegrationTest {
System.out.println("first analysis duration: " + (end - start));
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Test.json")) {
try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Test.json")) {
fileOutputStream.write(objectMapper.writeValueAsBytes(redactionStorageService.getText(TEST_DOSSIER_ID, TEST_FILE_ID)));
}
@ -750,7 +751,7 @@ public class RedactionIntegrationTest {
.fileId(TEST_FILE_ID)
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Annotated.pdf")) {
fileOutputStream.write(annotateResponse.getDocument());
}
@ -771,7 +772,7 @@ public class RedactionIntegrationTest {
.fileId(TEST_FILE_ID)
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Annotated.pdf")) {
fileOutputStream.write(annotateResponse.getDocument());
}
long end = System.currentTimeMillis();
@ -847,7 +848,7 @@ public class RedactionIntegrationTest {
.fileId(TEST_FILE_ID)
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Annotated.pdf")) {
fileOutputStream.write(annotateResponse.getDocument());
}
long end = System.currentTimeMillis();
@ -873,7 +874,7 @@ public class RedactionIntegrationTest {
RedactionResult result = redactionController.classify(redactionRequest);
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Classified.pdf")) {
try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Classified.pdf")) {
fileOutputStream.write(result.getDocument());
}
}
@ -895,7 +896,7 @@ public class RedactionIntegrationTest {
RedactionResult result = redactionController.sections(redactionRequest);
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Sections.pdf")) {
try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Sections.pdf")) {
fileOutputStream.write(result.getDocument());
}
}
@ -917,7 +918,7 @@ public class RedactionIntegrationTest {
RedactionResult result = redactionController.htmlTables(redactionRequest);
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Tables.html")) {
try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Tables.html")) {
fileOutputStream.write(result.getDocument());
}
}
@ -939,7 +940,7 @@ public class RedactionIntegrationTest {
RedactionResult result = redactionController.htmlTables(redactionRequest);
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Tables.html")) {
try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Tables.html")) {
fileOutputStream.write(result.getDocument());
}
}
@ -1007,7 +1008,7 @@ public class RedactionIntegrationTest {
.fileId(TEST_FILE_ID)
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Annotated.pdf")) {
fileOutputStream.write(annotateResponse.getDocument());
}
long end = System.currentTimeMillis();
@ -1031,7 +1032,7 @@ public class RedactionIntegrationTest {
.fileId(TEST_FILE_ID)
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Annotated.pdf")) {
fileOutputStream.write(annotateResponse.getDocument());
}
long end = System.currentTimeMillis();
@ -1059,4 +1060,13 @@ public class RedactionIntegrationTest {
}
}
private static String getTemporaryDirectory() {
String tmpdir = System.getProperty("java.io.tmpdir");
if (StringUtils.isNotBlank(tmpdir)) {
return tmpdir;
}
return "/tmp";
}
}