RED-2756 Bugfix with redactions are not continuous

This commit is contained in:
Philipp Schramm 2021-11-29 10:31:32 +01:00
parent 6230f42fcf
commit 9d0fafd63d
3 changed files with 227 additions and 53 deletions

View File

@ -1,30 +1,27 @@
package com.iqser.red.service.redaction.v1.server;
import com.amazonaws.services.s3.AmazonS3;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.*;
import com.iqser.red.service.persistence.service.v1.api.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.model.*;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.memory.MemoryStats;
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;
import lombok.SneakyThrows;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
@ -51,15 +48,47 @@ import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit4.SpringRunner;
import java.io.*;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.time.OffsetDateTime;
import java.util.*;
import java.util.stream.Collectors;
import com.amazonaws.services.s3.AmazonS3;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Comment;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualLegalBasisChange;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualResizeRedaction;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle;
import com.iqser.red.service.persistence.service.v1.api.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
import com.iqser.red.service.redaction.v1.model.AnnotateRequest;
import com.iqser.red.service.redaction.v1.model.AnnotateResponse;
import com.iqser.red.service.redaction.v1.model.FileAttribute;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
import com.iqser.red.service.redaction.v1.model.RedactionResult;
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.memory.MemoryStats;
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;
import lombok.SneakyThrows;
@RunWith(SpringRunner.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@ -631,29 +660,14 @@ public class RedactionIntegrationTest {
}
private List<File> getPathsRecursively(File path) {
List<File> result = new ArrayList<>();
if (path == null || path.listFiles() == null) {
return result;
}
for (File f : path.listFiles()) {
if (f.isFile()) {
result.add(f);
} else {
result.addAll(getPathsRecursively(f));
}
}
return result;
}
@Test
public void redactionTest() throws IOException {
public void redactionTestSeparatedRedaction() throws IOException {
String fileName = "scanned/VV-380943_page38.pdf";
String outputFileName = getTemporaryDirectory() + "/AnnotatedRedactionTestSeparatedRedaction.pdf";
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/new/VV-919901.pdf");
ClassPathResource pdfFileResource = new ClassPathResource(fileName);
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
request.setExcludedPages(Set.of(1));
@ -744,7 +758,124 @@ public class RedactionIntegrationTest {
.fileId(TEST_FILE_ID)
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Annotated.pdf")) {
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) {
fileOutputStream.write(annotateResponse.getDocument());
}
deleted.remove("mouse");
reanlysisVersions.put("mouse", 4L);
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(4L);
when(dictionaryClient.getDictionaryForType(VERTEBRATE)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
analyzeService.reanalyze(request);
redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
System.out.println("hi");
}
@Test
public void redactionTest() throws IOException {
String fileName = "files/new/VV-919901.pdf";
String outputFileName = getTemporaryDirectory() + "/Annotated.pdf";
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource(fileName);
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
request.setExcludedPages(Set.of(1));
request.setFileAttributes(List.of(FileAttribute.builder()
.id("fileAttributeId")
.label("Vertebrate Study")
.placeholder("{fileattributes.vertebrateStudy}")
.value("true")
.build()));
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var text = redactionStorageService.getText(TEST_DOSSIER_ID, TEST_FILE_ID);
long end = System.currentTimeMillis();
System.out.println("first analysis duration: " + (end - start));
try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Test.json")) {
fileOutputStream.write(objectMapper.writeValueAsBytes(redactionStorageService.getText(TEST_DOSSIER_ID, TEST_FILE_ID)));
}
int correctFound = 0;
loop:
for (RedactionLogEntry redactionLogEntry : redactionLog.getRedactionLogEntry()) {
for (SectionText sectionText : text.getSectionTexts()) {
if (redactionLogEntry.isImage()) {
correctFound++;
continue loop;
}
if (redactionLogEntry.getSectionNumber() == sectionText.getSectionNumber()) {
String value = sectionText.getText()
.substring(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset());
if (redactionLogEntry.getValue().equalsIgnoreCase(value)) {
correctFound++;
} else {
throw new RuntimeException("WTF");
}
}
}
}
assertThat(correctFound).isEqualTo(redactionLog.getRedactionLogEntry().size());
dictionary.get(AUTHOR).add("properties");
reanlysisVersions.put("properties", 1L);
dictionary.get(AUTHOR).add("physical");
reanlysisVersions.put("physical", 2L);
deleted.add("David Chubb");
deleted.add("mouse");
dictionary.get(FALSE_POSITIVE).add("David Chubb");
reanlysisVersions.put("David Chubb", 3L);
reanlysisVersions.put("mouse", 3L);
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(3L);
when(dictionaryClient.getDictionaryForType(VERTEBRATE)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE)).thenReturn(getDictionaryResponse(FALSE_POSITIVE, false));
start = System.currentTimeMillis();
ManualRedactions manualRedactions = new ManualRedactions();
manualRedactions.setImageRecategorization(Set.of(ManualImageRecategorization.builder()
.annotationId("37eee3e9d589a5cc529bfec38c3ba479")
.fileId("fileId")
.status(AnnotationStatus.APPROVED)
.type("signature")
.build()));
request.setManualRedactions(manualRedactions);
AnalyzeResult reanalyzeResult = analyzeService.reanalyze(request);
redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
end = System.currentTimeMillis();
System.out.println("reanalysis analysis duration: " + (end - start));
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) {
fileOutputStream.write(annotateResponse.getDocument());
}
@ -1193,6 +1324,24 @@ public class RedactionIntegrationTest {
}
private List<File> getPathsRecursively(File path) {
List<File> result = new ArrayList<>();
if (path == null || path.listFiles() == null) {
return result;
}
for (File f : path.listFiles()) {
if (f.isFile()) {
result.add(f);
} else {
result.addAll(getPathsRecursively(f));
}
}
return result;
}
private static String getTemporaryDirectory() {
String tmpdir = System.getProperty("java.io.tmpdir");

View File

@ -1,3 +1,28 @@
AD Hurt
N Pengelly
HA J Napper
E M Roper
Earl M
Weissler M S
Warrinton J S
Kuet SF
Hadeld ST
Butters C A
Hurt AD
Campbell AJ
Runnalls JK
Tummon O J
Chapman PF
Snell RJ
MclIndoe EC
Johnson R I
Richard Andrews
James Paul
Walter Richard Andrews
Wilbur H. Palmer
Jeff Mueller
James McDonelI
Jeffrey S. Heither
1 Braid S.and Tsui G
1 Schwader A.L.
2 Lee MR