Pull request #277: Bugfix/RED-2756
Merge in RED/redaction-service from bugfix/RED-2756 to master * commit '9d0fafd63ddb23fd8ee2f154e02df88687e08f96': RED-2756 Bugfix with redactions are not continuous RED-2756 Bugfix for 'Redaction is not continuous', compare line height and y position instead of rounding y values
This commit is contained in:
commit
f5817204bf
@ -1,5 +1,15 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Point;
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
@ -10,12 +20,8 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionS
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@ -24,7 +30,8 @@ public class RedactionLogCreatorService {
|
||||
private final DictionaryService dictionaryService;
|
||||
|
||||
|
||||
public List<RedactionLogEntry> createRedactionLog(PageEntities pageEntities, int numberOfPages, String dossierTemplateId) {
|
||||
public List<RedactionLogEntry> createRedactionLog(PageEntities pageEntities, int numberOfPages,
|
||||
String dossierTemplateId) {
|
||||
|
||||
List<RedactionLogEntry> entries = new ArrayList<>();
|
||||
|
||||
@ -130,13 +137,19 @@ public class RedactionLogCreatorService {
|
||||
rectangles.add(TextPositionSequence.fromData(textPositions, page).getRectangle());
|
||||
} else {
|
||||
float y = textPositions.get(0).getYDirAdj();
|
||||
float height = textPositions.get(0).getHeightDir();
|
||||
int startIndex = 0;
|
||||
|
||||
for (int i = 1; i < textPositions.size(); i++) {
|
||||
float yDirAdj = textPositions.get(i).getYDirAdj();
|
||||
if (round(yDirAdj,3) != round(y, 3)) {
|
||||
float heightDir = textPositions.get(i).getHeightDir();
|
||||
|
||||
if (!isCharInSameLine(y, yDirAdj, height, heightDir)) {
|
||||
|
||||
rectangles.add(TextPositionSequence.fromData(textPositions.subList(startIndex, i), page)
|
||||
.getRectangle());
|
||||
y = yDirAdj;
|
||||
height = heightDir;
|
||||
startIndex = i;
|
||||
}
|
||||
}
|
||||
@ -149,9 +162,21 @@ public class RedactionLogCreatorService {
|
||||
return rectangles;
|
||||
}
|
||||
|
||||
private double round(float value, int decimalPoints) {
|
||||
var d = Math.pow(10, decimalPoints);
|
||||
return Math.round(value * d) / d;
|
||||
|
||||
private boolean isCharInSameLine(float y, float yCompare, float height, float heightCompare) {
|
||||
|
||||
float offsetHeight = heightCompare / 5;
|
||||
float minHeight = heightCompare - offsetHeight;
|
||||
float maxHeight = heightCompare + offsetHeight;
|
||||
|
||||
float offsetY = heightCompare / 22;
|
||||
float minY = y - offsetY;
|
||||
float maxY = y + offsetY;
|
||||
|
||||
if (yCompare > minY && yCompare < maxY && height > minHeight && height < maxHeight) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -204,5 +229,4 @@ public class RedactionLogCreatorService {
|
||||
return dictionaryService.isRecommendation(type, dossierTemplateId);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -1,30 +1,27 @@
|
||||
package com.iqser.red.service.redaction.v1.server;
|
||||
|
||||
import com.amazonaws.services.s3.AmazonS3;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.*;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.common.JSONPrimitive;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
|
||||
import com.iqser.red.service.redaction.v1.model.*;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||
import com.iqser.red.service.redaction.v1.server.memory.MemoryStats;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
@ -51,15 +48,47 @@ import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
import com.amazonaws.services.s3.AmazonS3;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Comment;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualImageRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualLegalBasisChange;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactionEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualResizeRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.common.JSONPrimitive;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.AnnotateRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.AnnotateResponse;
|
||||
import com.iqser.red.service.redaction.v1.model.FileAttribute;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionResult;
|
||||
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||
import com.iqser.red.service.redaction.v1.server.memory.MemoryStats;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.Mockito.when;
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@RunWith(SpringRunner.class)
|
||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
@ -631,29 +660,14 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
private List<File> getPathsRecursively(File path) {
|
||||
|
||||
List<File> result = new ArrayList<>();
|
||||
if (path == null || path.listFiles() == null) {
|
||||
return result;
|
||||
}
|
||||
for (File f : path.listFiles()) {
|
||||
if (f.isFile()) {
|
||||
result.add(f);
|
||||
} else {
|
||||
result.addAll(getPathsRecursively(f));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void redactionTest() throws IOException {
|
||||
public void redactionTestSeparatedRedaction() throws IOException {
|
||||
|
||||
String fileName = "scanned/VV-380943_page38.pdf";
|
||||
String outputFileName = getTemporaryDirectory() + "/AnnotatedRedactionTestSeparatedRedaction.pdf";
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/new/VV-919901.pdf");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource(fileName);
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
request.setExcludedPages(Set.of(1));
|
||||
|
||||
@ -744,7 +758,124 @@ public class RedactionIntegrationTest {
|
||||
.fileId(TEST_FILE_ID)
|
||||
.build());
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Annotated.pdf")) {
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
|
||||
deleted.remove("mouse");
|
||||
reanlysisVersions.put("mouse", 4L);
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(4L);
|
||||
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
|
||||
|
||||
analyzeService.reanalyze(request);
|
||||
|
||||
redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
System.out.println("hi");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void redactionTest() throws IOException {
|
||||
|
||||
String fileName = "files/new/VV-919901.pdf";
|
||||
String outputFileName = getTemporaryDirectory() + "/Annotated.pdf";
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource(fileName);
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
request.setExcludedPages(Set.of(1));
|
||||
|
||||
request.setFileAttributes(List.of(FileAttribute.builder()
|
||||
.id("fileAttributeId")
|
||||
.label("Vertebrate Study")
|
||||
.placeholder("{fileattributes.vertebrateStudy}")
|
||||
.value("true")
|
||||
.build()));
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
var text = redactionStorageService.getText(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
System.out.println("first analysis duration: " + (end - start));
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Test.json")) {
|
||||
fileOutputStream.write(objectMapper.writeValueAsBytes(redactionStorageService.getText(TEST_DOSSIER_ID, TEST_FILE_ID)));
|
||||
}
|
||||
|
||||
int correctFound = 0;
|
||||
loop:
|
||||
for (RedactionLogEntry redactionLogEntry : redactionLog.getRedactionLogEntry()) {
|
||||
for (SectionText sectionText : text.getSectionTexts()) {
|
||||
if (redactionLogEntry.isImage()) {
|
||||
correctFound++;
|
||||
continue loop;
|
||||
}
|
||||
if (redactionLogEntry.getSectionNumber() == sectionText.getSectionNumber()) {
|
||||
String value = sectionText.getText()
|
||||
.substring(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset());
|
||||
if (redactionLogEntry.getValue().equalsIgnoreCase(value)) {
|
||||
correctFound++;
|
||||
} else {
|
||||
throw new RuntimeException("WTF");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
assertThat(correctFound).isEqualTo(redactionLog.getRedactionLogEntry().size());
|
||||
|
||||
dictionary.get(AUTHOR).add("properties");
|
||||
reanlysisVersions.put("properties", 1L);
|
||||
|
||||
dictionary.get(AUTHOR).add("physical");
|
||||
reanlysisVersions.put("physical", 2L);
|
||||
|
||||
deleted.add("David Chubb");
|
||||
deleted.add("mouse");
|
||||
|
||||
dictionary.get(FALSE_POSITIVE).add("David Chubb");
|
||||
reanlysisVersions.put("David Chubb", 3L);
|
||||
|
||||
reanlysisVersions.put("mouse", 3L);
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(3L);
|
||||
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
|
||||
|
||||
when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE)).thenReturn(getDictionaryResponse(FALSE_POSITIVE, false));
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
|
||||
ManualRedactions manualRedactions = new ManualRedactions();
|
||||
|
||||
manualRedactions.setImageRecategorization(Set.of(ManualImageRecategorization.builder()
|
||||
.annotationId("37eee3e9d589a5cc529bfec38c3ba479")
|
||||
.fileId("fileId")
|
||||
.status(AnnotationStatus.APPROVED)
|
||||
.type("signature")
|
||||
.build()));
|
||||
|
||||
request.setManualRedactions(manualRedactions);
|
||||
|
||||
AnalyzeResult reanalyzeResult = analyzeService.reanalyze(request);
|
||||
|
||||
redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
end = System.currentTimeMillis();
|
||||
System.out.println("reanalysis analysis duration: " + (end - start));
|
||||
|
||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
.fileId(TEST_FILE_ID)
|
||||
.build());
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
|
||||
@ -1193,6 +1324,24 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
private List<File> getPathsRecursively(File path) {
|
||||
|
||||
List<File> result = new ArrayList<>();
|
||||
if (path == null || path.listFiles() == null) {
|
||||
return result;
|
||||
}
|
||||
for (File f : path.listFiles()) {
|
||||
if (f.isFile()) {
|
||||
result.add(f);
|
||||
} else {
|
||||
result.addAll(getPathsRecursively(f));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
|
||||
private static String getTemporaryDirectory() {
|
||||
|
||||
String tmpdir = System.getProperty("java.io.tmpdir");
|
||||
|
||||
@ -1,3 +1,28 @@
|
||||
AD Hurt
|
||||
N Pengelly
|
||||
HA J Napper
|
||||
E M Roper
|
||||
Earl M
|
||||
Weissler M S
|
||||
Warrinton J S
|
||||
Kuet SF
|
||||
Hadeld ST
|
||||
Butters C A
|
||||
Hurt AD
|
||||
Campbell AJ
|
||||
Runnalls JK
|
||||
Tummon O J
|
||||
Chapman PF
|
||||
Snell RJ
|
||||
MclIndoe EC
|
||||
Johnson R I
|
||||
Richard Andrews
|
||||
James Paul
|
||||
Walter Richard Andrews
|
||||
Wilbur H. Palmer
|
||||
Jeff Mueller
|
||||
James McDonelI
|
||||
Jeffrey S. Heither
|
||||
1 Braid S.and Tsui G
|
||||
1 Schwader A.L.
|
||||
2 Lee MR
|
||||
|
||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user