diff --git a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts index 8896590a..9a8ea418 100644 --- a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts +++ b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts @@ -61,7 +61,7 @@ dependencies { implementation("com.fasterxml.jackson.module:jackson-module-afterburner:${jacksonVersion}") implementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:${jacksonVersion}") - implementation("org.ahocorasick:ahocorasick:0.6.3") + implementation("org.ahocorasick:ahocorasick:0.7.3") implementation("org.javassist:javassist:3.29.2-GA") implementation("org.drools:drools-engine:${droolsVersion}") @@ -129,6 +129,7 @@ tasks.named("bootBuildImage") { "BPE_APPEND_JAVA_TOOL_OPTIONS", "-XX:MaxMetaspaceSize=1g -Dfile.encoding=UTF-8 -Dkie.repository.project.cache.size=50 -Dkie.repository.project.versions.cache.size=5" ) + environment.put("BPE_DEFAULT_LANG", "en_US.utf8") environment.put("BPE_DEFAULT_LANG", "en_US.utf8") // java.text.Normalizer does not care for file.encoding imageName.set("nexus.knecon.com:5001/red/${project.name}")// must build image with same name always, otherwise the builder will not know which image to use as cache. DO NOT CHANGE! diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/SearchImplementation.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/SearchImplementation.java index 0fd97aed..e68f7350 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/SearchImplementation.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/SearchImplementation.java @@ -19,6 +19,7 @@ import lombok.Data; public class SearchImplementation { private boolean ignoreCase; + private boolean ignoreWhiteSpace; private List values; private Pattern pattern; @@ -41,13 +42,22 @@ public class SearchImplementation { } + public SearchImplementation(Collection values, boolean ignoreCase, boolean ignoreWhiteSpace) { + + this.values = new ArrayList<>(values); + this.ignoreCase = ignoreCase; + this.ignoreWhiteSpace = ignoreWhiteSpace; + this.createSearchImplementation(); + } + + private void createSearchImplementation() { if (this.values.isEmpty()) { return; } - if (this.values.size() == 1) { + if (this.values.size() == 1 && !this.ignoreWhiteSpace) { var text = this.values.iterator().next(); if (this.ignoreCase) { @@ -60,6 +70,9 @@ public class SearchImplementation { if (this.ignoreCase) { builder.ignoreCase(); } + if (this.ignoreWhiteSpace) { + builder.ignoreWhiteSpace(); + } builder.addKeywords(this.values); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/AtomicTextBlock.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/AtomicTextBlock.java index 8049d0b0..eef213f2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/AtomicTextBlock.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/AtomicTextBlock.java @@ -264,7 +264,7 @@ public class AtomicTextBlock implements TextBlock { } - private List getAllLineBreaksInBoundary(TextRange textRange) { + protected List getAllLineBreaksInBoundary(TextRange textRange) { return getLineBreaks().stream() .map(linebreak -> linebreak + this.textRange.start()) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/PartialTextBlock.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/PartialTextBlock.java new file mode 100644 index 00000000..7061c96b --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/PartialTextBlock.java @@ -0,0 +1,126 @@ +package com.iqser.red.service.redaction.v1.server.model.document.textblock; + +import java.awt.geom.Rectangle2D; +import java.text.BreakIterator; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +import com.iqser.red.service.redaction.v1.server.model.document.TextRange; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; + +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.experimental.FieldDefaults; + +@Data +@AllArgsConstructor +@FieldDefaults(level = AccessLevel.PRIVATE) +public class PartialTextBlock implements TextBlock { + + AtomicTextBlock atomicTextBlock; + TextRange textRange; + + + @Override + public String getSearchText() { + + return atomicTextBlock.getSearchText().substring(textRange.start(), textRange.end()); + } + + + @Override + public String getSearchTextLowerCase() { + + return atomicTextBlock.getSearchTextLowerCase().substring(textRange.start(), textRange.end()); + } + + + @Override + public List getWords() { + + String searchText = getSearchText(); + + List words = new ArrayList<>(); + BreakIterator iterator = BreakIterator.getWordInstance(Locale.ENGLISH); + iterator.setText(getSearchText()); + int start = iterator.first(); + for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) { + words.add(searchText.substring(start, end)); + } + return words; + } + + + @Override + public List getAtomicTextBlocks() { + + return List.of(atomicTextBlock); + } + + + @Override + public int getNextLinebreak(int fromIndex) { + + return atomicTextBlock.getNextLinebreak(fromIndex); + } + + + @Override + public int getPreviousLinebreak(int fromIndex) { + + return atomicTextBlock.getPreviousLinebreak(fromIndex); + } + + + @Override + public TextRange getLineTextRange(int lineNumber) { + + return atomicTextBlock.getLineTextRange(lineNumber); + } + + + @Override + public List getLineBreaks() { + + return atomicTextBlock.getAllLineBreaksInBoundary(textRange); + } + + + @Override + public Rectangle2D getPosition(int stringIdx) { + + return atomicTextBlock.getPosition(stringIdx); + } + + + @Override + public List getPositions(TextRange stringTextRange) { + + return atomicTextBlock.getPositions(stringTextRange); + } + + + @Override + public Map> getPositionsPerPage(TextRange stringTextRange) { + + return atomicTextBlock.getPositionsPerPage(stringTextRange); + } + + + @Override + public String subSequenceWithLineBreaks(TextRange textRange) { + + return atomicTextBlock.subSequenceWithLineBreaks(textRange); + } + + + @Override + public int numberOfLines() { + + return getLineBreaks().size() + 1; + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/NotFoundImportedEntitiesService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/NotFoundImportedEntitiesService.java index 2ef3505b..0e2edfe1 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/NotFoundImportedEntitiesService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/NotFoundImportedEntitiesService.java @@ -19,6 +19,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ChangeFactory; +import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings; import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity; import com.iqser.red.service.redaction.v1.server.model.RectangleWithPage; @@ -33,6 +34,7 @@ import lombok.extern.slf4j.Slf4j; public class NotFoundImportedEntitiesService { public static final String IMPORTED_REDACTION_TYPE = "imported_redaction"; + private final RedactionServiceSettings settings; @Timed("redactmanager_processEntityLog") @@ -95,7 +97,7 @@ public class NotFoundImportedEntitiesService { } entityLogEntry.getImportedRedactionIntersections().add(precursorEntity.getId()); - if (entityLogEntry.getState() != EntryState.REMOVED) { + if (entityLogEntry.getState() != EntryState.REMOVED && !settings.isAnnotationMode()) { entityLogEntry.setState(EntryState.REMOVED); entityLogEntry.getChanges().add(ChangeFactory.toChange(ChangeType.REMOVED, OffsetDateTime.now(), analysisNumber)); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java index 30d34278..ed3ffa2a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java @@ -348,16 +348,7 @@ public class ComponentCreationService { */ public void create(String ruleIdentifier, String name, String value, String valueDescription, Entity reference) { - referencedEntities.add(reference); - List referenceList = new LinkedList<>(); - referenceList.add(reference); - kieSession.insert(Component.builder() - .matchedRule(RuleIdentifier.fromString(ruleIdentifier)) - .name(name) - .value(value) - .valueDescription(valueDescription) - .references(referenceList) - .build()); + create(ruleIdentifier, name, value, valueDescription, List.of(reference)); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java index 3fc6cf33..086582e4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java @@ -4,6 +4,7 @@ import static java.lang.String.format; import static java.util.stream.Collectors.groupingBy; import java.awt.geom.Rectangle2D; +import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.List; @@ -14,6 +15,7 @@ import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; +import org.apache.commons.text.similarity.LevenshteinDistance; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @@ -22,14 +24,18 @@ import com.iqser.red.service.redaction.v1.server.model.ClosestEntity; import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity; import com.iqser.red.service.redaction.v1.server.model.RectangleWithPage; import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplementation; +import com.iqser.red.service.redaction.v1.server.model.document.TextRange; import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.PartialTextBlock; import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations; +import com.iqser.red.service.redaction.v1.server.utils.TextNormalizationUtilities; import lombok.extern.slf4j.Slf4j; @@ -37,6 +43,7 @@ import lombok.extern.slf4j.Slf4j; @Service public class EntityFindingUtility { + private static final float STRING_SIMILARITY_THRESHOLD = 0.2f; EntityCreationService entityCreationService; @@ -55,7 +62,7 @@ public class EntityFindingUtility { return Optional.empty(); } - List possibleEntities = entitiesWithSameValue.get(precursorEntity.getValue().toLowerCase(Locale.ENGLISH)); + List possibleEntities = entitiesWithSameValue.get(TextNormalizationUtilities.removeAllWhitespaces(precursorEntity.getValue().toLowerCase(Locale.ENGLISH))); if (entityIdentifierValueNotFound(possibleEntities)) { log.info("Entity could not be created with precursorEntity: {}, due to the value {} not being found anywhere.", precursorEntity, precursorEntity.getValue()); @@ -91,6 +98,79 @@ public class EntityFindingUtility { } + public Optional findEntityByUnderlyingText(PrecursorEntity precursorEntity, Document document) { + + if (precursorEntity.getEntityPosition().isEmpty()) { + return Optional.empty(); + } + + Optional optionalPage = document.getPages() + .stream() + .filter(docPage -> docPage.getNumber() + .equals(precursorEntity.getEntityPosition() + .get(0).pageNumber())) + .findFirst(); + + if (optionalPage.isEmpty()) { + return Optional.empty(); + } + + Page page = optionalPage.get(); + Rectangle2D rect = precursorEntity.getEntityPosition() + .stream() + .map(RectangleWithPage::rectangle2D) + .collect(RectangleTransformations.collectBBox()); + Optional intersectingTbOptional = page.getTextBlocksOnPage() + .stream() + .filter(tb -> RectangleTransformations.rectangle2DBBox(tb.getPositions()).intersects(rect)) + .findFirst(); + if (intersectingTbOptional.isEmpty()) { + return Optional.empty(); + } + AtomicTextBlock intersectingTb = intersectingTbOptional.get(); + List underlyingTextRuns = findUnderlyingCharacterRuns(intersectingTb, rect); + + for (PartialTextBlock underlyingText : underlyingTextRuns) { + int threshold = (int) (Math.min(underlyingText.length(), precursorEntity.length()) * STRING_SIMILARITY_THRESHOLD) + 1; + int distance = new LevenshteinDistance(threshold).apply(underlyingText.getSearchText(), precursorEntity.getValue()); + if (distance >= 0) { + return entityCreationService.byTextRangeWithEngine(underlyingText.getTextRange(), "temp", EntityType.ENTITY, document, Collections.emptySet()); + } + } + return Optional.empty(); + } + + + private static List findUnderlyingCharacterRuns(AtomicTextBlock intersectingTb, Rectangle2D rect) { + + List intersectingTextBlocks = new ArrayList<>(); + int first = -1; + int last = -1; + + for (int i = 0; i < intersectingTb.getPositions().size(); i++) { + Rectangle2D rectangle2D = intersectingTb.getPosition(i + intersectingTb.getTextRange().start()); + + if (rectangle2D.intersects(rect)) { + if (first == -1) { + first = i; + } + last = i; + } else if (first != -1) { + intersectingTextBlocks.add(new PartialTextBlock(intersectingTb, new TextRange(first, last))); + + first = -1; + last = -1; + } + } + + if (first != -1) { + intersectingTextBlocks.add(new PartialTextBlock(intersectingTb, new TextRange(first, last))); + } + + return intersectingTextBlocks; + } + + private static boolean entityIdentifierValueNotFound(List possibleEntities) { return possibleEntities == null || possibleEntities.isEmpty(); @@ -183,7 +263,7 @@ public class EntityFindingUtility { SearchImplementation searchImplementation = new SearchImplementation(entryValues.stream() .map(String::trim) - .collect(Collectors.toSet()), true); + .collect(Collectors.toSet()), true, true); List textBlocks = node.getTextBlocksByPageNumbers(pageNumbers); @@ -193,7 +273,7 @@ public class EntityFindingUtility { .filter(Optional::isPresent) .map(Optional::get) .distinct() - .collect(groupingBy(entity -> entity.getValue().toLowerCase(Locale.ROOT))); + .collect(groupingBy(entity -> TextNormalizationUtilities.removeAllWhitespaces(entity.getValue().toLowerCase(Locale.ROOT)))); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/TextNormalizationUtilities.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/TextNormalizationUtilities.java index fcaa358c..3bd17f9a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/TextNormalizationUtilities.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/TextNormalizationUtilities.java @@ -1,31 +1,47 @@ package com.iqser.red.service.redaction.v1.server.utils; +import java.util.regex.Pattern; + import lombok.experimental.UtilityClass; @UtilityClass public final class TextNormalizationUtilities { - /** - * Revert hyphenation due to line breaks. - * - * @param text Text to be processed. - * @return Text without line-break hyphenation. - */ - public static String removeHyphenLineBreaks(String text) { + public static final Pattern hyphenLineBreaks = Pattern.compile("[-~‐‒⁻−﹣゠⁓‑\\u00AD][\\r\\n]+"); + public static final Pattern linebreaks = Pattern.compile("[\\r\\n]+"); + public static final Pattern doubleWhitespaces = Pattern.compile("\\s{2,}"); + public static final Pattern WHITESPACE_REMOVAL = Pattern.compile("\\s+"); - return text.replaceAll("([^\\s\\d\\-]{2,500})[\\-\\u00AD]\\R", "$1"); + + public String cleanString(String value) { + + String noHyphenLinebreaks = removeHyphenLinebreaks(value); + String noLinebreaks = removeLinebreaks(noHyphenLinebreaks); + return removeMultipleWhitespaces(noLinebreaks); } - public static String removeLineBreaks(String text) { + public String removeHyphenLinebreaks(String value) { - return text.replaceAll("\n", " "); + return hyphenLineBreaks.matcher(value).replaceAll(""); } - public static String removeRepeatingWhitespaces(String text) { + private String removeMultipleWhitespaces(String value) { - return text.replaceAll(" {2}", " "); + return doubleWhitespaces.matcher(value).replaceAll(" "); + } + + + private String removeLinebreaks(String value) { + + return linebreaks.matcher(value).replaceAll(" "); + } + + + public String removeAllWhitespaces(String value) { + + return WHITESPACE_REMOVAL.matcher(value).replaceAll(""); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AbstractRedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AbstractRedactionIntegrationTest.java index 9900f7ee..234a34ac 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AbstractRedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AbstractRedactionIntegrationTest.java @@ -611,7 +611,7 @@ public abstract class AbstractRedactionIntegrationTest { private String cleanDictionaryEntry(String entry) { - return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " "); + return TextNormalizationUtilities.cleanString(entry); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index cde44f7a..f331d76b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -6,8 +6,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import java.io.BufferedWriter; @@ -37,7 +35,6 @@ import org.junit.jupiter.api.Order; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestMethodOrder; import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.Mock; import org.springframework.boot.autoconfigure.EnableAutoConfiguration; import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration; import org.springframework.boot.test.context.SpringBootTest; @@ -83,7 +80,6 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section; import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils; import com.iqser.red.service.redaction.v1.server.rules.RulesIntegrationTest; import com.iqser.red.service.redaction.v1.server.service.document.DocumentGraphMapper; -import com.iqser.red.service.redaction.v1.server.storage.DocumentDataFallbackService; import com.iqser.red.storage.commons.StorageAutoConfiguration; import com.iqser.red.storage.commons.service.StorageService; import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService; @@ -102,7 +98,6 @@ public class RedactionIntegrationTest extends RulesIntegrationTest { private static final String RULES = loadFromClassPath("drools/rules.drl"); - @Configuration @EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class}) @Import({LayoutParsingServiceProcessorConfiguration.class}) @@ -1233,6 +1228,44 @@ public class RedactionIntegrationTest extends RulesIntegrationTest { } + @Test + @Disabled + public void testImportedRedactions2() throws IOException { + + String outputFileName = OsUtils.getTemporaryDirectory() + "/ImportedRedactions.pdf"; + ClassPathResource importedRedactions = new ClassPathResource("files/ImportedRedactions/76c5683ebc8c19dc23eccea12dfc652b.IMPORTED_REDACTIONS.json"); + + AnalyzeRequest request = uploadFileToStorage("files/ImportedRedactions/76c5683ebc8c19dc23eccea12dfc652b.ORIGIN.pdf"); + storageService.storeObject(TenantContext.getTenantId(), + StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), + importedRedactions.getInputStream()); + + analyzeDocumentStructure(LayoutParsingType.DOCUMINE_OLD, request); + AnalyzeResult result = analyzeService.analyze(request); + + var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); + + AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build()); + + try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) { + fileOutputStream.write(annotateResponse.getDocument()); + } + + entityLog.getEntityLogEntry() + .forEach(entry -> { + if (entry.getValue() == null) { + return; + } + if (entry.getValue().equals("David")) { + assertThat(entry.getImportedRedactionIntersections()).hasSize(1); + } + if (entry.getValue().equals("annotation")) { + assertThat(entry.getImportedRedactionIntersections()).isEmpty(); + } + }); + } + + @Test public void testExpandByPrefixRegEx() throws IOException { @@ -2252,9 +2285,7 @@ public class RedactionIntegrationTest extends RulesIntegrationTest { @Test public void testFileWithImagesAndNoText() { - AnalyzeRequest request = prepareStorage("files/new/only_images.pdf", - "files/cv_service_empty_response.json", - "files/only_images_file_image_response.json"); + AnalyzeRequest request = prepareStorage("files/new/only_images.pdf", "files/cv_service_empty_response.json", "files/only_images_file_image_response.json"); analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request); AnalyzeResult result = analyzeService.analyze(request); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/TextNormalizationUtilitiesTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/TextNormalizationUtilitiesTest.java index a17350bb..bff72087 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/TextNormalizationUtilitiesTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/utils/TextNormalizationUtilitiesTest.java @@ -11,10 +11,10 @@ public class TextNormalizationUtilitiesTest { public void testHyphenRemoval() { String test = "Without these peo-\nple, this conference would not happen"; - Assertions.assertThat(TextNormalizationUtilities.removeHyphenLineBreaks(test)).contains("people"); + Assertions.assertThat(TextNormalizationUtilities.removeHyphenLinebreaks(test)).contains("people"); test = "Die\t\nFreiwillige\t Versicherung\t endet\t zudem\t für\t den\t ein\u00AD\nzelnen\tVersicherten\tmit\tder\tAufhebung\tdes\tVertra-\nges,\t seiner\t Unterstellung\t unter\t die\t obligatorische\t\nVersicherung\t oder\t seinem\t Ausschluss."; - Assertions.assertThat(TextNormalizationUtilities.removeHyphenLineBreaks(test)).contains("einzelnen", "Vertrages"); + Assertions.assertThat(TextNormalizationUtilities.removeHyphenLinebreaks(test)).contains("einzelnen", "Vertrages"); }