diff --git a/redaction-service-v1/redaction-service-api-v1/pom.xml b/redaction-service-v1/redaction-service-api-v1/pom.xml index fa22253a..0e570d9a 100644 --- a/redaction-service-v1/redaction-service-api-v1/pom.xml +++ b/redaction-service-v1/redaction-service-api-v1/pom.xml @@ -12,7 +12,7 @@ redaction-service-api-v1 - 0.121.0 + 0.135.0 diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionArea.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionArea.java index 07e67c9f..eb71f6fb 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionArea.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionArea.java @@ -30,4 +30,9 @@ public class SectionArea { return page == other.getPage() && this.topLeft.getX() <= other.getTopLeft().getX() && this.topLeft.getX() + this.getWidth() >= other.getTopLeft().getX() + other.getWidth() && this.getTopLeft().getY() <= other.getTopLeft().getY() && this.getTopLeft().getY() + this.getHeight() >= other.getTopLeft().getY() + other.getHeight(); } + // TODO we should only use one rectangle class. + public boolean contains(com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle other) { + return page == other.getPage() && this.topLeft.getX() <= other.getTopLeftX() && this.topLeft.getX() + this.getWidth() >= other.getTopLeftX() + other.getWidth() && this.getTopLeft().getY() <= other.getTopLeftY() && this.getTopLeft().getY() + this.getHeight() >= other.getTopLeftY() + other.getHeight(); + } + } diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/resources/RedactionResource.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/resources/RedactionResource.java index d6bb66f9..b7557406 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/resources/RedactionResource.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/resources/RedactionResource.java @@ -1,29 +1,43 @@ package com.iqser.red.service.redaction.v1.resources; +import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions; import com.iqser.red.service.redaction.v1.model.*; + import org.springframework.http.MediaType; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestParam; public interface RedactionResource { @PostMapping(value = "/annotate", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE) AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest); + @PostMapping(value = "/debug/classifications", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE) RedactionResult classify(@RequestBody RedactionRequest redactionRequest); + @PostMapping(value = "/debug/sections", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE) RedactionResult sections(@RequestBody RedactionRequest redactionRequest); + @PostMapping(value = "/debug/htmlTables", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE) RedactionResult htmlTables(@RequestBody RedactionRequest redactionRequest); + @PostMapping(value = "/rules/test", consumes = MediaType.APPLICATION_JSON_VALUE) void testRules(@RequestBody String rules); + @PostMapping(value = "/redaction-log/preview", consumes = MediaType.APPLICATION_JSON_VALUE) RedactionLog getRedactionLog(@RequestBody RedactionRequest redactionRequest); + + @PostMapping(value = "/manual/surrounding-text/{dossierId}/{fileId}", consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE) + ManualRedactions addSurroundingText(@PathVariable("dossierId") String dossierId, + @PathVariable("fileId") String fileId, + @RequestBody ManualRedactions manualRedactions); + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java index 0c882390..c75eee3e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java @@ -1,7 +1,24 @@ package com.iqser.red.service.redaction.v1.server.controller; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.stream.Collectors; + +import org.apache.pdfbox.io.MemoryUsageSetting; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RestController; + +import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions; import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType; -import com.iqser.red.service.redaction.v1.model.*; +import com.iqser.red.service.redaction.v1.model.AnnotateRequest; +import com.iqser.red.service.redaction.v1.model.AnnotateResponse; +import com.iqser.red.service.redaction.v1.model.RedactionLog; +import com.iqser.red.service.redaction.v1.model.RedactionRequest; +import com.iqser.red.service.redaction.v1.model.RedactionResult; +import com.iqser.red.service.redaction.v1.model.SectionArea; +import com.iqser.red.service.redaction.v1.model.SectionGrid; import com.iqser.red.service.redaction.v1.resources.RedactionResource; import com.iqser.red.service.redaction.v1.server.classification.model.Document; import com.iqser.red.service.redaction.v1.server.classification.model.Page; @@ -10,22 +27,16 @@ import com.iqser.red.service.redaction.v1.server.exception.RedactionException; import com.iqser.red.service.redaction.v1.server.redaction.service.AnnotationService; import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService; import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService; +import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService; import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogMergeService; import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService; import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService; + import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.apache.pdfbox.io.MemoryUsageSetting; -import org.apache.pdfbox.pdmodel.PDDocument; -import org.springframework.web.bind.annotation.RequestBody; -import org.springframework.web.bind.annotation.RestController; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.util.stream.Collectors; @Slf4j @RestController @@ -39,11 +50,19 @@ public class RedactionController implements RedactionResource { private final PdfSegmentationService pdfSegmentationService; private final RedactionStorageService redactionStorageService; private final RedactionLogMergeService redactionLogMergeService; + private final ManualRedactionSurroundingTextService manualRedactionSurroundingTextService; + public AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest) { - var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(annotateRequest.getDossierId(), annotateRequest.getFileId(), FileType.ORIGIN)); - var mergedRedactionLog = getRedactionLog(RedactionRequest.builder().fileId(annotateRequest.getFileId()).manualRedactions(annotateRequest.getManualRedactions()).dossierId(annotateRequest.getDossierId()).dossierTemplateId(annotateRequest.getDossierTemplateId()).build()); + var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(annotateRequest + .getDossierId(), annotateRequest.getFileId(), FileType.ORIGIN)); + var mergedRedactionLog = getRedactionLog(RedactionRequest.builder() + .fileId(annotateRequest.getFileId()) + .manualRedactions(annotateRequest.getManualRedactions()) + .dossierId(annotateRequest.getDossierId()) + .dossierTemplateId(annotateRequest.getDossierTemplateId()) + .build()); var sectionsGrid = redactionStorageService.getSectionGrid(annotateRequest.getDossierId(), annotateRequest.getFileId()); try (PDDocument pdDocument = PDDocument.load(storedObjectStream, MemoryUsageSetting.setupTempFileOnly())) { @@ -65,11 +84,14 @@ public class RedactionController implements RedactionResource { @Override public RedactionResult classify(@RequestBody RedactionRequest redactionRequest) { - var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); + + var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest + .getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); try { Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream); - storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); + storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest + .getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) { pdDocument.setAllSecurityToBeRemoved(true); @@ -85,16 +107,19 @@ public class RedactionController implements RedactionResource { throw new RedactionException(e); } - } + @Override public RedactionResult sections(@RequestBody RedactionRequest redactionRequest) { - var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); + + var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest + .getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); try { Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream); - storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); + storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest + .getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) { pdDocument.setAllSecurityToBeRemoved(true); @@ -109,7 +134,6 @@ public class RedactionController implements RedactionResource { throw new RedactionException(e); } - } @@ -119,13 +143,13 @@ public class RedactionController implements RedactionResource { Document classifiedDoc; try { - var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); + var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest + .getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN)); classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, true); } catch (Exception e) { throw new RedactionException(e); } - StringBuilder sb = new StringBuilder(); for (Page page : classifiedDoc.getPages()) { for (AbstractTextContainer textContainer : page.getTextBlocks()) { @@ -140,12 +164,14 @@ public class RedactionController implements RedactionResource { } + @Override public void testRules(@RequestBody String rules) { droolsExecutionService.testRules(rules); } + @Override public RedactionLog getRedactionLog(RedactionRequest redactionRequest) { @@ -160,7 +186,8 @@ public class RedactionController implements RedactionResource { log.info("Loaded redaction log with computationalVersion: {}", redactionLog.getAnalysisVersion()); - SectionGrid sectionGrid = redactionStorageService.getSectionGrid(redactionRequest.getDossierId(), redactionRequest.getFileId()); + SectionGrid sectionGrid = redactionStorageService.getSectionGrid(redactionRequest.getDossierId(), redactionRequest + .getFileId()); if (sectionGrid.getSections().isEmpty()) { log.info("SectionGrid does not have headlines set. Computing headlines now!"); @@ -168,24 +195,27 @@ public class RedactionController implements RedactionResource { // enhance section grid with headline data for (var sectionText : text.getSectionTexts()) { - sectionGrid.getSections().add(new SectionGrid.SectionGridSection(sectionText.getSectionNumber(), - sectionText.getHeadline(), - sectionText.getSectionAreas().stream().map(SectionArea::getPage).collect(Collectors.toSet()), - sectionText.getSectionAreas())); + sectionGrid.getSections() + .add(new SectionGrid.SectionGridSection(sectionText.getSectionNumber(), sectionText.getHeadline(), sectionText + .getSectionAreas() + .stream() + .map(SectionArea::getPage) + .collect(Collectors.toSet()), sectionText.getSectionAreas())); } redactionStorageService.storeObject(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.SECTION_GRID, sectionGrid); } - log.info("Loaded redaction log with computationalVersion: {}", redactionLog.getAnalysisVersion()); if (redactionLog.getAnalysisVersion() == 0) { // old redaction logs are returned directly return redactionLog; } else { - return redactionLogMergeService.mergeRedactionLogData(redactionLog, sectionGrid, redactionRequest.getDossierTemplateId(), redactionRequest.getManualRedactions(), redactionRequest.getExcludedPages()); + return redactionLogMergeService.mergeRedactionLogData(redactionLog, sectionGrid, redactionRequest.getDossierTemplateId(), redactionRequest + .getManualRedactions(), redactionRequest.getExcludedPages()); } } + private RedactionResult convert(PDDocument document, int numberOfPages) throws IOException { try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) { @@ -199,4 +229,16 @@ public class RedactionController implements RedactionResource { } + @Override + public ManualRedactions addSurroundingText(@PathVariable("dossierId") String dossierId, + @PathVariable("fileId") String fileId, + @RequestBody ManualRedactions manualRedactions) { + + long start = System.currentTimeMillis(); + var result = manualRedactionSurroundingTextService.addSurroundingText(dossierId, fileId, manualRedactions); + log.info("add surrounding text for manual redaction in dossierId {} and fileId {} took: {}", dossierId, fileId, System + .currentTimeMillis() - start); + return result; + } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionSurroundingTextService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionSurroundingTextService.java new file mode 100644 index 00000000..c5b2bb74 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualRedactionSurroundingTextService.java @@ -0,0 +1,141 @@ +package com.iqser.red.service.redaction.v1.server.redaction.service; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import org.apache.commons.lang3.tuple.Pair; +import org.springframework.stereotype.Service; + +import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactionEntry; +import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions; +import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualResizeRedaction; +import com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle; +import com.iqser.red.service.redaction.v1.model.Engine; +import com.iqser.red.service.redaction.v1.model.SectionArea; +import com.iqser.red.service.redaction.v1.server.classification.model.SectionText; +import com.iqser.red.service.redaction.v1.server.classification.model.Text; +import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; +import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; +import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence; +import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils; +import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; + +import lombok.RequiredArgsConstructor; + +@Service +@RequiredArgsConstructor +public class ManualRedactionSurroundingTextService { + + private final RedactionStorageService redactionStorageService; + private final SurroundingWordsService surroundingWordsService; + + + public ManualRedactions addSurroundingText(String dossierId, String fileId, ManualRedactions manualRedactions) { + + Text text = redactionStorageService.getText(dossierId, fileId); + List processedAddRedactions = new ArrayList<>(); + List processedResizeRedactions = new ArrayList<>(); + + for (SectionText sectionText : text.getSectionTexts()) { + + if (manualRedactions.getEntriesToAdd().isEmpty() && manualRedactions.getResizeRedactions().isEmpty()) { + break; + } + + for (SectionArea sectionArea : sectionText.getSectionAreas()) { + + var addItty = manualRedactions.getEntriesToAdd().iterator(); + while (addItty.hasNext()) { + var manualAddRedaction = addItty.next(); + if (sectionContainsEntry(sectionArea, manualAddRedaction.getPositions())) { + var surroundingText = findSurroundingText(sectionText, manualAddRedaction.getValue(), manualAddRedaction + .getPositions()); + manualAddRedaction.setTextBefore(surroundingText.getLeft()); + manualAddRedaction.setTextAfter(surroundingText.getRight()); + processedAddRedactions.add(manualAddRedaction); + addItty.remove(); + } + } + + var resizeItty = manualRedactions.getResizeRedactions().iterator(); + while (resizeItty.hasNext()) { + var manualResizeRedaction = resizeItty.next(); + if (sectionContainsEntry(sectionArea, manualResizeRedaction.getPositions())) { + var surroundingText = findSurroundingText(sectionText, manualResizeRedaction.getValue(), manualResizeRedaction + .getPositions()); + manualResizeRedaction.setTextBefore(surroundingText.getLeft()); + manualResizeRedaction.setTextAfter(surroundingText.getRight()); + processedResizeRedactions.add(manualResizeRedaction); + resizeItty.remove(); + } + } + } + } + + manualRedactions.getEntriesToAdd().addAll(processedAddRedactions); + manualRedactions.getResizeRedactions().addAll(processedResizeRedactions); + return manualRedactions; + } + + + private Pair findSurroundingText(SectionText sectionText, String value, + List toFindPositions) { + + Set entities = EntitySearchUtils.find(sectionText.getText(), Set.of(value), "dummy", sectionText.getHeadline(), sectionText + .getSectionNumber(), false, false, Engine.DICTIONARY); + Set entitiesWithPositions = EntitySearchUtils.clearAndFindPositions(entities, sectionText.getSearchableText(), null); + + Entity correctEntity = getEntityOnCorrectPosition(entitiesWithPositions, toFindPositions); + + if (sectionText.getCellStarts() != null && !sectionText.getCellStarts().isEmpty()) { + surroundingWordsService.addSurroundingText(Set.of(correctEntity), sectionText.getSearchableText(), null, sectionText + .getCellStarts()); + } else { + surroundingWordsService.addSurroundingText(Set.of(correctEntity), sectionText.getSearchableText(), null); + } + + return Pair.of(correctEntity.getTextBefore(), correctEntity.getTextAfter()); + } + + + private boolean sectionContainsEntry(SectionArea sectionArea, List positions) { + + for (Rectangle position : positions) { + if (sectionArea.contains(position)) { + return true; + } + } + return false; + } + + + private Entity getEntityOnCorrectPosition(Set entitiesWithPositions, List toFindPositions) { + + for (Entity entityWithPos : entitiesWithPositions) { + for (EntityPositionSequence entityPositionSequence : entityWithPos.getPositionSequences()) { + for (TextPositionSequence textPositionSequence : entityPositionSequence.getSequences()) { + for (Rectangle manualRedactionRectangle : toFindPositions) { + if (intersects(manualRedactionRectangle, textPositionSequence.getRectangle())) { + return entityWithPos; + } + } + } + } + } + + return null; + } + + + public boolean intersects(Rectangle manualPosition, + com.iqser.red.service.redaction.v1.model.Rectangle textPositionRectangle) { + + return textPositionRectangle.getTopLeft() + .getX() + textPositionRectangle.getWidth() > manualPosition.getTopLeftX() && textPositionRectangle.getTopLeft() + .getY() + textPositionRectangle.getHeight() > manualPosition.getTopLeftY() && textPositionRectangle.getTopLeft() + .getX() < manualPosition.getTopLeftX() + manualPosition.getWidth() && textPositionRectangle.getTopLeft() + .getY() < manualPosition.getTopLeftY() + manualPosition.getHeight(); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogMergeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogMergeService.java index 167b30f2..8828cf0b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogMergeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogMergeService.java @@ -254,12 +254,16 @@ public class RedactionLogMergeService { redactionLogEntry.setPositions(convertPositions(manualResizeRedact.getPositions())); redactionLogEntry.setValue(manualResizeRedact.getValue()); redactionLogEntry.setHasBeenResized(true); + redactionLogEntry.setTextBefore(manualResizeRedact.getTextBefore()); + redactionLogEntry.setTextAfter(manualResizeRedact.getTextAfter()); manualOverrideReason = mergeReasonIfNecessary(redactionLogEntry.getReason(), ", resized by manual override"); } else if (manualResizeRedact.getStatus().equals(AnnotationStatus.REQUESTED)) { manualOverrideReason = mergeReasonIfNecessary(redactionLogEntry.getReason(), ", requested to resize redact"); redactionLogEntry.setStatus(AnnotationStatus.REQUESTED); redactionLogEntry.setColor(getColor(redactionLogEntry.getType(), dossierTemplateId, true, redactionLogEntry.isRedacted(), false)); redactionLogEntry.setPositions(convertPositions(manualResizeRedact.getPositions())); + redactionLogEntry.setTextBefore(manualResizeRedact.getTextBefore()); + redactionLogEntry.setTextAfter(manualResizeRedact.getTextAfter()); } else { redactionLogEntry.setStatus(AnnotationStatus.DECLINED); } @@ -300,6 +304,8 @@ public class RedactionLogMergeService { RedactionLogEntry redactionLogEntry = createRedactionLogEntry(manualRedactionEntry, manualRedactionEntry.getAnnotationId(), dossierTemplateId); redactionLogEntry.setPositions(convertPositions(manualRedactionEntry.getPositions())); redactionLogEntry.setComments(comments.get(manualRedactionEntry.getAnnotationId())); + redactionLogEntry.setTextBefore(manualRedactionEntry.getTextBefore()); + redactionLogEntry.setTextAfter(manualRedactionEntry.getTextAfter()); sectionTextService.handleSectionText(sectionGrid, redactionLogEntry); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/SurroundingWordsService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/SurroundingWordsService.java index a34b53bb..55ef0b1b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/SurroundingWordsService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/SurroundingWordsService.java @@ -28,7 +28,7 @@ public class SurroundingWordsService { try { for (Entity entity : entities) { - if (dictionary.isHint(entity.getType())) { + if (dictionary != null && dictionary.isHint(entity.getType())) { continue; } findSurroundingWords(entity, searchableText.toString(), entity.getStart(), entity.getEnd()); @@ -64,7 +64,7 @@ public class SurroundingWordsService { String text = searchableString.substring(startOffset, endOffset); for (Entity entity : entities) { - if (dictionary.isHint(entity.getType())) { + if (dictionary != null && dictionary.isHint(entity.getType())) { continue; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java index 7d101356..5597e8a4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java @@ -104,7 +104,7 @@ public class EntitySearchUtils { .collect(Collectors.toList()); Entity firstEntity = orderedEntities.get(0); List positionSequences = text.getSequences(firstEntity.getWord() - .trim(), dictionary.isCaseInsensitiveDictionary(firstEntity.getType()), firstEntity.getTargetSequences()); + .trim(), dictionary == null ? true : dictionary.isCaseInsensitiveDictionary(firstEntity.getType()), firstEntity.getTargetSequences()); for (int i = 0; i <= orderedEntities.size() - 1; i++) { try { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 05742bbf..b4b388bd 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -82,6 +82,7 @@ import com.iqser.red.service.redaction.v1.server.client.RulesClient; import com.iqser.red.service.redaction.v1.server.controller.RedactionController; import com.iqser.red.service.redaction.v1.server.memory.MemoryStats; import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService; +import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService; import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader; import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities; import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; @@ -145,6 +146,9 @@ public class RedactionIntegrationTest { @Autowired private StorageService storageService; + @Autowired + private ManualRedactionSurroundingTextService manualRedactionSurroundingTextService; + @MockBean private AmazonS3 amazonS3; @@ -1329,6 +1333,86 @@ public class RedactionIntegrationTest { } + @Test + public void testManualSurroundingText() throws IOException { + + ClassPathResource pdfFileResource = new ClassPathResource("files/new/S4.pdf"); + + ManualRedactions manualRedactions = new ManualRedactions(); + + ManualRedactionEntry manualRedactionEntry = new ManualRedactionEntry(); + manualRedactionEntry.setAnnotationId(UUID.randomUUID().toString()); + manualRedactionEntry.setFileId("fileId"); + manualRedactionEntry.setStatus(AnnotationStatus.APPROVED); + manualRedactionEntry.setType("CBI_author"); + manualRedactionEntry.setValue("rabbits"); + manualRedactionEntry.setReason("Manual Redaction"); + manualRedactionEntry.setPositions(List.of(Rectangle.builder() + .topLeftX(70.944f) + .topLeftY(670.1595f) + .width(30.07296f) + .height(10.048125f) + .page(1) + .build())); + + ManualRedactionEntry manualRedactionEntry2 = new ManualRedactionEntry(); + manualRedactionEntry2.setAnnotationId(UUID.randomUUID().toString()); + manualRedactionEntry2.setFileId("fileId"); + manualRedactionEntry2.setStatus(AnnotationStatus.APPROVED); + manualRedactionEntry2.setType("CBI_author"); + manualRedactionEntry2.setValue("rabbits"); + manualRedactionEntry2.setReason("Manual Redaction"); + manualRedactionEntry2.setPositions(List.of(Rectangle.builder() + .topLeftX(470.5204f) + .topLeftY(746.1195f) + .width(29.96256f) + .height(10.048125f) + .page(1) + .build())); + + ManualRedactionEntry manualRedactionEntry3 = new ManualRedactionEntry(); + manualRedactionEntry3.setAnnotationId(UUID.randomUUID().toString()); + manualRedactionEntry3.setFileId("fileId"); + manualRedactionEntry3.setStatus(AnnotationStatus.APPROVED); + manualRedactionEntry3.setType("CBI_author"); + manualRedactionEntry3.setValue("AOEL"); + manualRedactionEntry3.setReason("Manual Redaction"); + manualRedactionEntry3.setPositions(List.of(Rectangle.builder() + .topLeftX(355.53775f) + .topLeftY(266.1895f) + .width(29.32224f) + .height(10.048125f) + .page(1) + .build())); + + manualRedactions.getEntriesToAdd().add(manualRedactionEntry); + manualRedactions.getEntriesToAdd().add(manualRedactionEntry2); + manualRedactions.getEntriesToAdd().add(manualRedactionEntry3); + + AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream()); + request.setManualRedactions(manualRedactions); + analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); + AnalyzeResult result = analyzeService.analyze(request); + + AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder() + .dossierId(TEST_DOSSIER_ID) + .dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID) + .fileId(TEST_FILE_ID) + .manualRedactions(manualRedactions) + .build()); + + try (FileOutputStream fileOutputStream = new FileOutputStream(getTemporaryDirectory() + "/Annotated.pdf")) { + fileOutputStream.write(annotateResponse.getDocument()); + } + + var surroundingTextResult = manualRedactionSurroundingTextService.addSurroundingText(TEST_DOSSIER_ID, TEST_FILE_ID, manualRedactions); + surroundingTextResult.getEntriesToAdd().forEach(addEntry -> { + assertThat(addEntry.getTextAfter()).isNotEmpty(); + }); + + } + + private static String loadFromClassPath(String path) { URL resource = ResourceLoader.class.getClassLoader().getResource(path); @@ -1375,4 +1459,4 @@ public class RedactionIntegrationTest { return "/tmp"; } -} +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/S4.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/S4.pdf new file mode 100644 index 00000000..f28ddb63 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/S4.pdf differ