From b45432d9cdd9c18bed3ac822ca732525a0414709 Mon Sep 17 00:00:00 2001 From: Philipp Schramm Date: Tue, 30 Nov 2021 12:03:17 +0100 Subject: [PATCH 1/2] RED-2756 Bugfix for 'Redaction is not continuous', compare line height and y position instead of rounding y values --- .../service/RedactionLogCreatorService.java | 83 +++++++++++++------ 1 file changed, 57 insertions(+), 26 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java index de13e302..46ba56f4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java @@ -1,5 +1,15 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.commons.collections4.CollectionUtils; +import org.springframework.stereotype.Service; + import com.iqser.red.service.redaction.v1.model.CellRectangle; import com.iqser.red.service.redaction.v1.model.Point; import com.iqser.red.service.redaction.v1.model.Rectangle; @@ -17,16 +27,8 @@ import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder; import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; -import lombok.RequiredArgsConstructor; -import org.apache.commons.collections4.CollectionUtils; -import org.springframework.stereotype.Service; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; +import lombok.RequiredArgsConstructor; @Service @RequiredArgsConstructor @@ -54,7 +56,8 @@ public class RedactionLogCreatorService { } - public List addImageEntries(Map> images, int pageNumber, String dossierTemplateId) { + public List addImageEntries(Map> images, int pageNumber, + String dossierTemplateId) { List redactionLogEntities = new ArrayList<>(); @@ -81,11 +84,10 @@ public class RedactionLogCreatorService { .sectionNumber(image.getSectionNumber()) //.section(image.getSection()) // RED-2622 - .section("Image:"+image.getType()) + .section("Image:" + image.getType()) .imageHasTransparency(image.isHasTransparency()) .build(); - redactionLogEntities.add(redactionLogEntry); } @@ -103,7 +105,6 @@ public class RedactionLogCreatorService { entityLoop: for (Entity entity : entities.get(page)) { - for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) { RedactionLogEntry redactionLogEntry = createRedactionLogEntry(entity, dossierTemplateId); @@ -123,12 +124,10 @@ public class RedactionLogCreatorService { .flatMap(seq -> seq.getTextPositions().stream()) .collect(Collectors.toList()), page); - redactionLogEntry.getPositions().addAll(rectanglesPerLine); } - // FIXME ids should never be null. Figure out why this happens. if (redactionLogEntry.getId() != null) { redactionLogEntities.add(redactionLogEntry); @@ -146,16 +145,27 @@ public class RedactionLogCreatorService { if (textPositions.size() == 1) { rectangles.add(TextPositionSequence.fromData(textPositions, page).getRectangle()); } else { + float x = textPositions.get(0).getXDirAdj(); float y = textPositions.get(0).getYDirAdj(); + float width = textPositions.get(0).getWidth(); + float height = textPositions.get(0).getHeightDir(); int startIndex = 0; + for (int i = 1; i < textPositions.size(); i++) { + float xDirAdj = textPositions.get(i).getXDirAdj(); float yDirAdj = textPositions.get(i).getYDirAdj(); - if (round(yDirAdj,3) != round(y, 3)) { + float widthDir = textPositions.get(i).getWidth(); + float heightDir = textPositions.get(i).getHeightDir(); + + if (!(isCharInSameLine(y, yDirAdj, height, heightDir) && isCharClose(x, xDirAdj, width))) { rectangles.add(TextPositionSequence.fromData(textPositions.subList(startIndex, i), page) .getRectangle()); y = yDirAdj; + width = widthDir; + height = heightDir; startIndex = i; } + x = xDirAdj; } if (startIndex != textPositions.size()) { rectangles.add(TextPositionSequence.fromData(textPositions.subList(startIndex, textPositions.size()), page) @@ -166,9 +176,32 @@ public class RedactionLogCreatorService { return rectangles; } - private double round(float value, int decimalPoints) { - var d = Math.pow(10, decimalPoints); - return Math.round(value * d) / d; + + private boolean isCharClose(float x, float xDirAdj, float width) { + + float max = x + (5 * width); + if (xDirAdj < max) { + return true; + } + return false; + } + + + private boolean isCharInSameLine(float y, float yCompare, float height, float heightCompare) { + + float offsetHeight = height / 2; + float minHeight = height - offsetHeight; + float maxHeight = height + offsetHeight; + + float offsetY = height / 10; + float minY = y - offsetY; + float maxY = y + offsetY; + + if (yCompare > minY && yCompare < maxY && heightCompare > minHeight && heightCompare < maxHeight) { + return true; + } + + return false; } @@ -213,8 +246,8 @@ public class RedactionLogCreatorService { classifiedDoc.getSectionGrid() .getRectanglesPerPage() .computeIfAbsent(page, (x) -> new ArrayList<>()) - .add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock - .getHeight(), i + 1, paragraph.getPageBlocks().size())); + .add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock.getHeight(), i + 1, paragraph.getPageBlocks() + .size())); } else if (textBlock instanceof Table) { @@ -222,8 +255,7 @@ public class RedactionLogCreatorService { for (List row : ((Table) textBlock).getRows()) { for (Cell cell : row) { if (cell != null) { - cellRectangles.add(new CellRectangle(new Point((float) cell.getX(), (float) cell.getY()), (float) cell - .getWidth(), (float) cell.getHeight())); + cellRectangles.add(new CellRectangle(new Point((float) cell.getX(), (float) cell.getY()), (float) cell.getWidth(), (float) cell.getHeight())); } } } @@ -231,8 +263,8 @@ public class RedactionLogCreatorService { classifiedDoc.getSectionGrid() .getRectanglesPerPage() .computeIfAbsent(page, (x) -> new ArrayList<>()) - .add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock - .getHeight(), i + 1, paragraph.getPageBlocks().size(), cellRectangles)); + .add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock.getHeight(), i + 1, paragraph.getPageBlocks() + .size(), cellRectangles)); } } @@ -260,5 +292,4 @@ public class RedactionLogCreatorService { return dictionaryService.isRecommendation(type, dossierTemplateId); } - } From 8a763edc2bc53d2e61efd7155b7af53272e114ef Mon Sep 17 00:00:00 2001 From: Philipp Schramm Date: Tue, 30 Nov 2021 12:03:46 +0100 Subject: [PATCH 2/2] RED-2756 Bugfix for temporary files for Windows systems --- .../v1/server/RedactionIntegrationTest.java | 32 ++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index b88f67f1..deb6c1e6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -22,6 +22,7 @@ import com.iqser.red.storage.commons.service.StorageService; import lombok.SneakyThrows; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.junit.After; import org.junit.Before; import org.junit.Ignore; @@ -569,7 +570,7 @@ public class RedactionIntegrationTest { .fileId(TEST_FILE_ID) .build()); - try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated3.pdf")) { + try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Annotated3.pdf")) { fileOutputStream.write(annotateResponse.getDocument()); } long rstart = System.currentTimeMillis(); @@ -682,7 +683,7 @@ public class RedactionIntegrationTest { System.out.println("first analysis duration: " + (end - start)); - try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Test.json")) { + try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Test.json")) { fileOutputStream.write(objectMapper.writeValueAsBytes(redactionStorageService.getText(TEST_DOSSIER_ID, TEST_FILE_ID))); } @@ -750,7 +751,7 @@ public class RedactionIntegrationTest { .fileId(TEST_FILE_ID) .build()); - try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) { + try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Annotated.pdf")) { fileOutputStream.write(annotateResponse.getDocument()); } @@ -771,7 +772,7 @@ public class RedactionIntegrationTest { .fileId(TEST_FILE_ID) .build()); - try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) { + try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Annotated.pdf")) { fileOutputStream.write(annotateResponse.getDocument()); } long end = System.currentTimeMillis(); @@ -847,7 +848,7 @@ public class RedactionIntegrationTest { .fileId(TEST_FILE_ID) .build()); - try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) { + try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Annotated.pdf")) { fileOutputStream.write(annotateResponse.getDocument()); } long end = System.currentTimeMillis(); @@ -873,7 +874,7 @@ public class RedactionIntegrationTest { RedactionResult result = redactionController.classify(redactionRequest); - try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Classified.pdf")) { + try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Classified.pdf")) { fileOutputStream.write(result.getDocument()); } } @@ -895,7 +896,7 @@ public class RedactionIntegrationTest { RedactionResult result = redactionController.sections(redactionRequest); - try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Sections.pdf")) { + try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Sections.pdf")) { fileOutputStream.write(result.getDocument()); } } @@ -917,7 +918,7 @@ public class RedactionIntegrationTest { RedactionResult result = redactionController.htmlTables(redactionRequest); - try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Tables.html")) { + try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Tables.html")) { fileOutputStream.write(result.getDocument()); } } @@ -939,7 +940,7 @@ public class RedactionIntegrationTest { RedactionResult result = redactionController.htmlTables(redactionRequest); - try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Tables.html")) { + try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Tables.html")) { fileOutputStream.write(result.getDocument()); } } @@ -1007,7 +1008,7 @@ public class RedactionIntegrationTest { .fileId(TEST_FILE_ID) .build()); - try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) { + try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Annotated.pdf")) { fileOutputStream.write(annotateResponse.getDocument()); } long end = System.currentTimeMillis(); @@ -1031,7 +1032,7 @@ public class RedactionIntegrationTest { .fileId(TEST_FILE_ID) .build()); - try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) { + try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Annotated.pdf")) { fileOutputStream.write(annotateResponse.getDocument()); } long end = System.currentTimeMillis(); @@ -1059,4 +1060,13 @@ public class RedactionIntegrationTest { } } + private static String getTemporaryDirectory() { + + String tmpdir = System.getProperty("java.io.tmpdir"); + if (StringUtils.isNotBlank(tmpdir)) { + return tmpdir; + } + return "/tmp"; + } + }