Merge branch 'DM-357' into 'master'

Resolve DM-357

Closes DM-357

See merge request redactmanager/redaction-service!92
This commit is contained in:
Kilian Schüttler 2023-08-16 12:36:04 +02:00
commit 5311612295
8 changed files with 56 additions and 213 deletions

13
publish-custom-image.sh Normal file → Executable file
View File

@ -2,7 +2,14 @@
dir=${PWD##*/}
gradle assemble
buildNumber=${1:-1}
# Get the current Git branch
branch=$(git rev-parse --abbrev-ref HEAD)
gradle bootBuildImage --cleanCache --publishImage -PbuildbootDockerHostNetwork=true -Pversion=$USER-$buildNumber
echo "nexus.knecon.com:5001/red/${dir}-server-v1:$USER-$buildNumber"
# Get the short commit hash (first 5 characters)
commit_hash=$(git rev-parse --short=5 HEAD)
# Combine branch and commit hash
buildName="${branch}-${commit_hash}"
gradle bootBuildImage --cleanCache --publishImage -PbuildbootDockerHostNetwork=true -Pversion=$buildName
echo "nexus.knecon.com:5001/red/${dir}-server-v1:$buildName"

View File

@ -23,7 +23,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResu
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
import com.iqser.red.service.redaction.v1.server.client.FileStatusProcessingUpdateClient;
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
@ -37,7 +36,6 @@ public class RedactionMessageReceiver {
private final ObjectMapper objectMapper;
private final AnalyzeService analyzeService;
private final FileStatusProcessingUpdateClient fileStatusProcessingUpdateClient;
private final ManualRedactionSurroundingTextService manualRedactionSurroundingTextService;
@SneakyThrows
@ -98,19 +96,6 @@ public class RedactionMessageReceiver {
log.info("----------------------------------------------------------------------------------");
break;
case SURROUNDING_TEXT:
log.info("------------------------------Add surrounding Text--------------------------------");
log.info("Starting surrounding text search for entries {} ", analyzeRequest.getManualRedactions().getEntriesToAdd());
result = manualRedactionSurroundingTextService.addSurroundingText(analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
analyzeRequest.getManualRedactions());
log.info("Successfully added surrounding text for manual redaction in dossierId {} and fileId {} took: {} s",
analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
format("%.2f", result.getDuration() / 1000.0));
log.info("----------------------------------------------------------------------------------");
break;
default:
throw new IllegalArgumentException("Unknown MessageType: " + analyzeRequest.getMessageType());
}

View File

@ -1,122 +0,0 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Pair;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.redaction.v1.server.exception.NotFoundException;
import com.iqser.red.service.redaction.v1.server.document.data.mapper.DocumentGraphMapper;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.document.services.EntityCreationService;
import com.iqser.red.service.redaction.v1.server.document.services.EntityEnrichmentService;
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import io.micrometer.core.annotation.Timed;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class ManualRedactionSurroundingTextService {
private final RedactionStorageService redactionStorageService;
private final EntityEnrichmentService entityEnrichmentService;
@Timed("redactmanager_surroundingTextAnalysis")
public AnalyzeResult addSurroundingText(String dossierId, String fileId, ManualRedactions manualRedactions) {
long startTime = System.currentTimeMillis();
Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(dossierId, fileId));
List<ManualRedactionEntry> processedAddRedactions = new ArrayList<>();
for (SemanticNode node : document.streamChildren().toList()) {
if (manualRedactions.getEntriesToAdd().isEmpty()) {
break;
}
var addItty = manualRedactions.getEntriesToAdd().iterator();
while (addItty.hasNext()) {
var manualAddRedaction = addItty.next();
if (sectionContainsEntry(node, manualAddRedaction.getPositions())) {
Pair<String, String> surroundingText = findSurroundingText(node, manualAddRedaction.getValue(), manualAddRedaction.getPositions());
manualAddRedaction.setTextBefore(surroundingText.getLeft());
manualAddRedaction.setTextAfter(surroundingText.getRight());
processedAddRedactions.add(manualAddRedaction);
addItty.remove();
}
}
}
manualRedactions.getEntriesToAdd().addAll(processedAddRedactions);
return AnalyzeResult.builder().dossierId(dossierId).fileId(fileId).manualRedactions(manualRedactions).duration(System.currentTimeMillis() - startTime).build();
}
private Pair<String, String> findSurroundingText(SemanticNode node, String value, List<Rectangle> toFindPositions) {
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService);
Set<RedactionEntity> entities = RedactionSearchUtility.findBoundariesByString(value, node.getTextBlock())
.stream()
.map(boundary -> entityCreationService.forceByBoundary(boundary, "searchHelper", EntityType.RECOMMENDATION, node))
.collect(Collectors.toSet());
RedactionEntity correctEntity = getEntityOnCorrectPosition(entities, toFindPositions);
return Pair.of(correctEntity.getTextBefore(), correctEntity.getTextAfter());
}
private boolean sectionContainsEntry(SemanticNode semanticNode, List<Rectangle> positions) {
for (Rectangle position : positions) {
if (semanticNode.containsRectangle(ManualRedactionSurroundingTextService.toRectangle2D(position), position.getPage())) {
return true;
}
}
return false;
}
private RedactionEntity getEntityOnCorrectPosition(Set<RedactionEntity> entities, List<Rectangle> toFindPositions) {
return entities.stream()
.filter(entity -> entity.getRedactionPositionsPerPage()
.stream()
.map(RedactionPosition::getRectanglePerLine)
.flatMap(Collection::stream)
.allMatch(rectangle2D -> toFindPositionsIntersectRectangle(toFindPositions, rectangle2D)))
.findFirst()
.orElseThrow(() -> new NotFoundException("No matching Entity could be found for positions" + toFindPositions));
}
private static boolean toFindPositionsIntersectRectangle(List<Rectangle> toFindPositions, Rectangle2D rectangle2D) {
return toFindPositions.stream().map(ManualRedactionSurroundingTextService::toRectangle2D).anyMatch(toFindRectangle -> toFindRectangle.intersects(rectangle2D));
}
private static Rectangle2D toRectangle2D(Rectangle rect) {
return new Rectangle2D.Double(rect.getTopLeftX() - rect.getWidth(), rect.getTopLeftY() - rect.getHeight(), rect.getWidth(), rect.getHeight());
}
}

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
@ -20,6 +21,8 @@ import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Image;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType;
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityIdentifier;
import com.iqser.red.service.redaction.v1.server.redaction.model.RectangleWithPage;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@ -114,8 +117,10 @@ public class RedactionLogCreatorService {
}
public RedactionLogEntry createRedactionLogEntry(EntityIdentifier entityIdentifier, String dossierTemplateId) {
List<Integer> pageNumbers = entityIdentifier.getEntityPosition().stream().map(RectangleWithPage::pageNumber).toList();
List<Rectangle2D> rectanglesPerLine = entityIdentifier.getEntityPosition().stream().map(RectangleWithPage::rectangle2D).toList();
return RedactionLogEntry.builder()
.id(IdBuilder.buildId(pageNumbers, rectanglesPerLine, entityIdentifier.getType(), entityIdentifier.getEntityType().name()))
.color(getColor(entityIdentifier.getType(), dossierTemplateId, entityIdentifier.isApplied()))
.reason(entityIdentifier.getReason())
.legalBasis(entityIdentifier.getLegalBasis())

View File

@ -1,6 +1,9 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@ -9,14 +12,17 @@ import org.springframework.stereotype.Component;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.NodeType;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryIncrement;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryIncrementValue;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.SearchImplementation;
@ -38,10 +44,9 @@ class SectionFinderService {
long start = System.currentTimeMillis();
Set<String> relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions());
Set<Integer> sectionsToReanalyse = new HashSet<>();
for (RedactionLogEntry entry : redactionLog.getRedactionLogEntry()) {
if (entry.isLocalManualRedaction() || relevantManuallyModifiedAnnotationIds.contains(entry.getId())) {
if (relevantManuallyModifiedAnnotationIds.contains(entry.getId())) {
sectionsToReanalyse.add(entry.getSectionNumber());
}
}
@ -55,12 +60,45 @@ class SectionFinderService {
}
});
Set<Integer> relevantPagesForReanalysis = getRelevantPageNumbersForAddRedactions(analyzeRequest);
if (!relevantPagesForReanalysis.isEmpty()) {
sectionsToReanalyse.addAll(getSectionNumbersOnPages(document, relevantPagesForReanalysis));
}
log.debug("Took: {} milliseconds to find sections to reanalyze", System.currentTimeMillis() - start);
return sectionsToReanalyse;
}
private static List<Integer> getSectionNumbersOnPages(Document document, Set<Integer> relevantPagesForReanalysis) {
return document.getPages()
.stream()
.filter(page -> relevantPagesForReanalysis.contains(page.getNumber()))
.flatMap(page -> Stream.concat(page.getMainBody().stream().filter(node -> node.getType().equals(NodeType.SECTION)), Stream.of(page.getHeader(), page.getFooter())))
.map(node -> node.getTreeId().get(0))
.toList();
}
private static Set<Integer> getRelevantPageNumbersForAddRedactions(AnalyzeRequest analyzeRequest) {
if (analyzeRequest.getManualRedactions() == null) {
return Collections.emptySet();
}
return analyzeRequest.getManualRedactions()
.getEntriesToAdd()
.stream()
.filter(addRedaction -> !(addRedaction.isAddToDictionary() || addRedaction.isAddToDossierDictionary()))
.map(ManualRedactionEntry::getPositions)
.flatMap(Collection::stream)
.map(Rectangle::getPage)
.collect(Collectors.toSet());
}
private static Set<String> getRelevantManuallyModifiedAnnotationIds(ManualRedactions manualRedactions) {
if (manualRedactions == null) {

View File

@ -36,7 +36,6 @@ import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
@ -113,9 +112,6 @@ public abstract class AbstractRedactionIntegrationTest {
@Autowired
protected StorageService storageService;
@Autowired
protected ManualRedactionSurroundingTextService manualRedactionSurroundingTextService;
@Autowired
private LayoutParsingPipeline layoutParsingPipeline;

View File

@ -1187,69 +1187,6 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
}
@Test
@Disabled
public void testManualSurroundingText() throws IOException {
String pdfFile = "files/new/S4.pdf";
ManualRedactions manualRedactions = new ManualRedactions();
ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry();
manualRedactionEntry.setAnnotationId(UUID.randomUUID().toString());
manualRedactionEntry.setFileId("fileId");
manualRedactionEntry.setStatus(AnnotationStatus.APPROVED);
manualRedactionEntry.setType("CBI_author");
manualRedactionEntry.setValue("rabbits");
manualRedactionEntry.setReason("Manual Redaction");
manualRedactionEntry.setPositions(List.of(Rectangle.builder().topLeftX(70.944f).topLeftY(670.1595f).width(30.07296f).height(10.048125f).page(1).build()));
ManualRedactionEntry manualRedactionEntry2 = new ManualRedactionEntry();
manualRedactionEntry2.setAnnotationId(UUID.randomUUID().toString());
manualRedactionEntry2.setFileId("fileId");
manualRedactionEntry2.setStatus(AnnotationStatus.APPROVED);
manualRedactionEntry2.setType("CBI_author");
manualRedactionEntry2.setValue("rabbits");
manualRedactionEntry2.setReason("Manual Redaction");
manualRedactionEntry2.setPositions(List.of(Rectangle.builder().topLeftX(470.5204f).topLeftY(746.1195f).width(29.96256f).height(10.048125f).page(1).build()));
ManualRedactionEntry manualRedactionEntry3 = new ManualRedactionEntry();
manualRedactionEntry3.setAnnotationId(UUID.randomUUID().toString());
manualRedactionEntry3.setFileId("fileId");
manualRedactionEntry3.setStatus(AnnotationStatus.APPROVED);
manualRedactionEntry3.setType("CBI_author");
manualRedactionEntry3.setValue("AOEL");
manualRedactionEntry3.setReason("Manual Redaction");
manualRedactionEntry3.setPositions(List.of(Rectangle.builder().topLeftX(355.53775f).topLeftY(266.1895f).width(29.32224f).height(10.048125f).page(1).build()));
manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
manualRedactions.getEntriesToAdd().add(manualRedactionEntry2);
manualRedactions.getEntriesToAdd().add(manualRedactionEntry3);
AnalyzeRequest request = uploadFileToStorage(pdfFile);
request.setManualRedactions(manualRedactions);
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
AnalyzeResult result = analyzeService.analyze(request);
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
.dossierId(TEST_DOSSIER_ID)
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.fileId(TEST_FILE_ID)
.manualRedactions(manualRedactions)
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Annotated.pdf")) {
fileOutputStream.write(annotateResponse.getDocument());
}
var surroundingTextResult = manualRedactionSurroundingTextService.addSurroundingText(TEST_DOSSIER_ID, TEST_FILE_ID, manualRedactions).getManualRedactions();
surroundingTextResult.getEntriesToAdd().forEach(addEntry -> {
assertThat(addEntry.getTextAfter()).isNotEmpty();
});
}
@Test
public void testImportedRedactions() throws IOException {

View File

@ -77,7 +77,6 @@ import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
@ -244,8 +243,6 @@ public class RulesTest {
private LayoutParsingPipeline layoutParsingPipeline;
@Autowired
private StorageService storageService;
@Autowired
private ManualRedactionSurroundingTextService manualRedactionSurroundingTextService;
@MockBean
private AmazonS3 amazonS3;
@MockBean