Compare commits
2 Commits
master
...
extended-e
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1db515acf0 | ||
|
|
f9e7703063 |
@ -61,7 +61,7 @@ dependencies {
|
||||
|
||||
implementation("com.fasterxml.jackson.module:jackson-module-afterburner:${jacksonVersion}")
|
||||
implementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:${jacksonVersion}")
|
||||
implementation("org.ahocorasick:ahocorasick:0.6.3")
|
||||
implementation("org.ahocorasick:ahocorasick:0.7.3")
|
||||
implementation("org.javassist:javassist:3.29.2-GA")
|
||||
|
||||
implementation("org.drools:drools-engine:${droolsVersion}")
|
||||
@ -129,6 +129,7 @@ tasks.named<BootBuildImage>("bootBuildImage") {
|
||||
"BPE_APPEND_JAVA_TOOL_OPTIONS",
|
||||
"-XX:MaxMetaspaceSize=1g -Dfile.encoding=UTF-8 -Dkie.repository.project.cache.size=50 -Dkie.repository.project.versions.cache.size=5"
|
||||
)
|
||||
environment.put("BPE_DEFAULT_LANG", "en_US.utf8")
|
||||
environment.put("BPE_DEFAULT_LANG", "en_US.utf8") // java.text.Normalizer does not care for file.encoding
|
||||
|
||||
imageName.set("nexus.knecon.com:5001/red/${project.name}")// must build image with same name always, otherwise the builder will not know which image to use as cache. DO NOT CHANGE!
|
||||
|
||||
@ -19,6 +19,7 @@ import lombok.Data;
|
||||
public class SearchImplementation {
|
||||
|
||||
private boolean ignoreCase;
|
||||
private boolean ignoreWhiteSpace;
|
||||
private List<String> values;
|
||||
|
||||
private Pattern pattern;
|
||||
@ -41,13 +42,22 @@ public class SearchImplementation {
|
||||
}
|
||||
|
||||
|
||||
public SearchImplementation(Collection<String> values, boolean ignoreCase, boolean ignoreWhiteSpace) {
|
||||
|
||||
this.values = new ArrayList<>(values);
|
||||
this.ignoreCase = ignoreCase;
|
||||
this.ignoreWhiteSpace = ignoreWhiteSpace;
|
||||
this.createSearchImplementation();
|
||||
}
|
||||
|
||||
|
||||
private void createSearchImplementation() {
|
||||
|
||||
if (this.values.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.values.size() == 1) {
|
||||
if (this.values.size() == 1 && !this.ignoreWhiteSpace) {
|
||||
var text = this.values.iterator().next();
|
||||
|
||||
if (this.ignoreCase) {
|
||||
@ -60,6 +70,9 @@ public class SearchImplementation {
|
||||
if (this.ignoreCase) {
|
||||
builder.ignoreCase();
|
||||
}
|
||||
if (this.ignoreWhiteSpace) {
|
||||
builder.ignoreWhiteSpace();
|
||||
}
|
||||
|
||||
builder.addKeywords(this.values);
|
||||
|
||||
|
||||
@ -264,7 +264,7 @@ public class AtomicTextBlock implements TextBlock {
|
||||
}
|
||||
|
||||
|
||||
private List<Integer> getAllLineBreaksInBoundary(TextRange textRange) {
|
||||
protected List<Integer> getAllLineBreaksInBoundary(TextRange textRange) {
|
||||
|
||||
return getLineBreaks().stream()
|
||||
.map(linebreak -> linebreak + this.textRange.start())
|
||||
|
||||
@ -0,0 +1,126 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.textblock;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.text.BreakIterator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class PartialTextBlock implements TextBlock {
|
||||
|
||||
AtomicTextBlock atomicTextBlock;
|
||||
TextRange textRange;
|
||||
|
||||
|
||||
@Override
|
||||
public String getSearchText() {
|
||||
|
||||
return atomicTextBlock.getSearchText().substring(textRange.start(), textRange.end());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getSearchTextLowerCase() {
|
||||
|
||||
return atomicTextBlock.getSearchTextLowerCase().substring(textRange.start(), textRange.end());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<String> getWords() {
|
||||
|
||||
String searchText = getSearchText();
|
||||
|
||||
List<String> words = new ArrayList<>();
|
||||
BreakIterator iterator = BreakIterator.getWordInstance(Locale.ENGLISH);
|
||||
iterator.setText(getSearchText());
|
||||
int start = iterator.first();
|
||||
for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) {
|
||||
words.add(searchText.substring(start, end));
|
||||
}
|
||||
return words;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<AtomicTextBlock> getAtomicTextBlocks() {
|
||||
|
||||
return List.of(atomicTextBlock);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int getNextLinebreak(int fromIndex) {
|
||||
|
||||
return atomicTextBlock.getNextLinebreak(fromIndex);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int getPreviousLinebreak(int fromIndex) {
|
||||
|
||||
return atomicTextBlock.getPreviousLinebreak(fromIndex);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public TextRange getLineTextRange(int lineNumber) {
|
||||
|
||||
return atomicTextBlock.getLineTextRange(lineNumber);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<Integer> getLineBreaks() {
|
||||
|
||||
return atomicTextBlock.getAllLineBreaksInBoundary(textRange);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Rectangle2D getPosition(int stringIdx) {
|
||||
|
||||
return atomicTextBlock.getPosition(stringIdx);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<Rectangle2D> getPositions(TextRange stringTextRange) {
|
||||
|
||||
return atomicTextBlock.getPositions(stringTextRange);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Map<Page, List<Rectangle2D>> getPositionsPerPage(TextRange stringTextRange) {
|
||||
|
||||
return atomicTextBlock.getPositionsPerPage(stringTextRange);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String subSequenceWithLineBreaks(TextRange textRange) {
|
||||
|
||||
return atomicTextBlock.subSequenceWithLineBreaks(textRange);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int numberOfLines() {
|
||||
|
||||
return getLineBreaks().size() + 1;
|
||||
}
|
||||
|
||||
}
|
||||
@ -19,6 +19,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ChangeFactory;
|
||||
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
|
||||
import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.RectangleWithPage;
|
||||
|
||||
@ -33,6 +34,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
public class NotFoundImportedEntitiesService {
|
||||
|
||||
public static final String IMPORTED_REDACTION_TYPE = "imported_redaction";
|
||||
private final RedactionServiceSettings settings;
|
||||
|
||||
|
||||
@Timed("redactmanager_processEntityLog")
|
||||
@ -95,7 +97,7 @@ public class NotFoundImportedEntitiesService {
|
||||
}
|
||||
entityLogEntry.getImportedRedactionIntersections().add(precursorEntity.getId());
|
||||
|
||||
if (entityLogEntry.getState() != EntryState.REMOVED) {
|
||||
if (entityLogEntry.getState() != EntryState.REMOVED && !settings.isAnnotationMode()) {
|
||||
entityLogEntry.setState(EntryState.REMOVED);
|
||||
entityLogEntry.getChanges().add(ChangeFactory.toChange(ChangeType.REMOVED, OffsetDateTime.now(), analysisNumber));
|
||||
}
|
||||
|
||||
@ -348,16 +348,7 @@ public class ComponentCreationService {
|
||||
*/
|
||||
public void create(String ruleIdentifier, String name, String value, String valueDescription, Entity reference) {
|
||||
|
||||
referencedEntities.add(reference);
|
||||
List<Entity> referenceList = new LinkedList<>();
|
||||
referenceList.add(reference);
|
||||
kieSession.insert(Component.builder()
|
||||
.matchedRule(RuleIdentifier.fromString(ruleIdentifier))
|
||||
.name(name)
|
||||
.value(value)
|
||||
.valueDescription(valueDescription)
|
||||
.references(referenceList)
|
||||
.build());
|
||||
create(ruleIdentifier, name, value, valueDescription, List.of(reference));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -4,6 +4,7 @@ import static java.lang.String.format;
|
||||
import static java.util.stream.Collectors.groupingBy;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
@ -14,6 +15,7 @@ import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.text.similarity.LevenshteinDistance;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@ -22,14 +24,18 @@ import com.iqser.red.service.redaction.v1.server.model.ClosestEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.RectangleWithPage;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplementation;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.PartialTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.TextNormalizationUtilities;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@ -37,6 +43,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@Service
|
||||
public class EntityFindingUtility {
|
||||
|
||||
private static final float STRING_SIMILARITY_THRESHOLD = 0.2f;
|
||||
EntityCreationService entityCreationService;
|
||||
|
||||
|
||||
@ -55,7 +62,7 @@ public class EntityFindingUtility {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
List<TextEntity> possibleEntities = entitiesWithSameValue.get(precursorEntity.getValue().toLowerCase(Locale.ENGLISH));
|
||||
List<TextEntity> possibleEntities = entitiesWithSameValue.get(TextNormalizationUtilities.removeAllWhitespaces(precursorEntity.getValue().toLowerCase(Locale.ENGLISH)));
|
||||
|
||||
if (entityIdentifierValueNotFound(possibleEntities)) {
|
||||
log.info("Entity could not be created with precursorEntity: {}, due to the value {} not being found anywhere.", precursorEntity, precursorEntity.getValue());
|
||||
@ -91,6 +98,79 @@ public class EntityFindingUtility {
|
||||
}
|
||||
|
||||
|
||||
public Optional<TextEntity> findEntityByUnderlyingText(PrecursorEntity precursorEntity, Document document) {
|
||||
|
||||
if (precursorEntity.getEntityPosition().isEmpty()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
Optional<Page> optionalPage = document.getPages()
|
||||
.stream()
|
||||
.filter(docPage -> docPage.getNumber()
|
||||
.equals(precursorEntity.getEntityPosition()
|
||||
.get(0).pageNumber()))
|
||||
.findFirst();
|
||||
|
||||
if (optionalPage.isEmpty()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
Page page = optionalPage.get();
|
||||
Rectangle2D rect = precursorEntity.getEntityPosition()
|
||||
.stream()
|
||||
.map(RectangleWithPage::rectangle2D)
|
||||
.collect(RectangleTransformations.collectBBox());
|
||||
Optional<AtomicTextBlock> intersectingTbOptional = page.getTextBlocksOnPage()
|
||||
.stream()
|
||||
.filter(tb -> RectangleTransformations.rectangle2DBBox(tb.getPositions()).intersects(rect))
|
||||
.findFirst();
|
||||
if (intersectingTbOptional.isEmpty()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
AtomicTextBlock intersectingTb = intersectingTbOptional.get();
|
||||
List<PartialTextBlock> underlyingTextRuns = findUnderlyingCharacterRuns(intersectingTb, rect);
|
||||
|
||||
for (PartialTextBlock underlyingText : underlyingTextRuns) {
|
||||
int threshold = (int) (Math.min(underlyingText.length(), precursorEntity.length()) * STRING_SIMILARITY_THRESHOLD) + 1;
|
||||
int distance = new LevenshteinDistance(threshold).apply(underlyingText.getSearchText(), precursorEntity.getValue());
|
||||
if (distance >= 0) {
|
||||
return entityCreationService.byTextRangeWithEngine(underlyingText.getTextRange(), "temp", EntityType.ENTITY, document, Collections.emptySet());
|
||||
}
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
|
||||
private static List<PartialTextBlock> findUnderlyingCharacterRuns(AtomicTextBlock intersectingTb, Rectangle2D rect) {
|
||||
|
||||
List<PartialTextBlock> intersectingTextBlocks = new ArrayList<>();
|
||||
int first = -1;
|
||||
int last = -1;
|
||||
|
||||
for (int i = 0; i < intersectingTb.getPositions().size(); i++) {
|
||||
Rectangle2D rectangle2D = intersectingTb.getPosition(i + intersectingTb.getTextRange().start());
|
||||
|
||||
if (rectangle2D.intersects(rect)) {
|
||||
if (first == -1) {
|
||||
first = i;
|
||||
}
|
||||
last = i;
|
||||
} else if (first != -1) {
|
||||
intersectingTextBlocks.add(new PartialTextBlock(intersectingTb, new TextRange(first, last)));
|
||||
|
||||
first = -1;
|
||||
last = -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (first != -1) {
|
||||
intersectingTextBlocks.add(new PartialTextBlock(intersectingTb, new TextRange(first, last)));
|
||||
}
|
||||
|
||||
return intersectingTextBlocks;
|
||||
}
|
||||
|
||||
|
||||
private static boolean entityIdentifierValueNotFound(List<TextEntity> possibleEntities) {
|
||||
|
||||
return possibleEntities == null || possibleEntities.isEmpty();
|
||||
@ -183,7 +263,7 @@ public class EntityFindingUtility {
|
||||
|
||||
SearchImplementation searchImplementation = new SearchImplementation(entryValues.stream()
|
||||
.map(String::trim)
|
||||
.collect(Collectors.toSet()), true);
|
||||
.collect(Collectors.toSet()), true, true);
|
||||
|
||||
List<TextBlock> textBlocks = node.getTextBlocksByPageNumbers(pageNumbers);
|
||||
|
||||
@ -193,7 +273,7 @@ public class EntityFindingUtility {
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.distinct()
|
||||
.collect(groupingBy(entity -> entity.getValue().toLowerCase(Locale.ROOT)));
|
||||
.collect(groupingBy(entity -> TextNormalizationUtilities.removeAllWhitespaces(entity.getValue().toLowerCase(Locale.ROOT))));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -34,7 +34,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class EntityFromPrecursorCreationService {
|
||||
|
||||
static double MATCH_THRESHOLD = 10; // Is compared to the average sum of distances in pdf coordinates for each corner of the bounding box of the entities
|
||||
static double MATCH_THRESHOLD = 100; // Is compared to the average sum of distances in pdf coordinates for each corner of the bounding box of the entities
|
||||
EntityFindingUtility entityFindingUtility;
|
||||
DictionaryService dictionaryService;
|
||||
RedactionServiceSettings settings;
|
||||
@ -130,10 +130,8 @@ public class EntityFromPrecursorCreationService {
|
||||
} else {
|
||||
String section = precursorEntity.getManualOverwrite().getSection()
|
||||
.orElse(null);
|
||||
if ((section == null || section.isBlank())
|
||||
&& precursorEntity.getSection() != null
|
||||
&& !precursorEntity.getSection().isBlank()
|
||||
&& precursorEntity.getEngines().contains(Engine.IMPORTED)) {
|
||||
if ((section == null || section.isBlank()) && precursorEntity.getSection() != null && !precursorEntity.getSection().isBlank() && precursorEntity.getEngines()
|
||||
.contains(Engine.IMPORTED)) {
|
||||
section = precursorEntity.getSection();
|
||||
}
|
||||
|
||||
|
||||
@ -1,31 +1,47 @@
|
||||
package com.iqser.red.service.redaction.v1.server.utils;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public final class TextNormalizationUtilities {
|
||||
|
||||
/**
|
||||
* Revert hyphenation due to line breaks.
|
||||
*
|
||||
* @param text Text to be processed.
|
||||
* @return Text without line-break hyphenation.
|
||||
*/
|
||||
public static String removeHyphenLineBreaks(String text) {
|
||||
public static final Pattern hyphenLineBreaks = Pattern.compile("[-~‐‒⁻−﹣゠⁓‑\\u00AD][\\r\\n]+");
|
||||
public static final Pattern linebreaks = Pattern.compile("[\\r\\n]+");
|
||||
public static final Pattern doubleWhitespaces = Pattern.compile("\\s{2,}");
|
||||
public static final Pattern WHITESPACE_REMOVAL = Pattern.compile("\\s+");
|
||||
|
||||
return text.replaceAll("([^\\s\\d\\-]{2,500})[\\-\\u00AD]\\R", "$1");
|
||||
|
||||
public String cleanString(String value) {
|
||||
|
||||
String noHyphenLinebreaks = removeHyphenLinebreaks(value);
|
||||
String noLinebreaks = removeLinebreaks(noHyphenLinebreaks);
|
||||
return removeMultipleWhitespaces(noLinebreaks);
|
||||
}
|
||||
|
||||
|
||||
public static String removeLineBreaks(String text) {
|
||||
public String removeHyphenLinebreaks(String value) {
|
||||
|
||||
return text.replaceAll("\n", " ");
|
||||
return hyphenLineBreaks.matcher(value).replaceAll("");
|
||||
}
|
||||
|
||||
|
||||
public static String removeRepeatingWhitespaces(String text) {
|
||||
private String removeMultipleWhitespaces(String value) {
|
||||
|
||||
return text.replaceAll(" {2}", " ");
|
||||
return doubleWhitespaces.matcher(value).replaceAll(" ");
|
||||
}
|
||||
|
||||
|
||||
private String removeLinebreaks(String value) {
|
||||
|
||||
return linebreaks.matcher(value).replaceAll(" ");
|
||||
}
|
||||
|
||||
|
||||
public String removeAllWhitespaces(String value) {
|
||||
|
||||
return WHITESPACE_REMOVAL.matcher(value).replaceAll("");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -611,7 +611,7 @@ public abstract class AbstractRedactionIntegrationTest {
|
||||
|
||||
private String cleanDictionaryEntry(String entry) {
|
||||
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
|
||||
return TextNormalizationUtilities.cleanString(entry);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -6,8 +6,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.anyString;
|
||||
import static org.mockito.Mockito.times;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
@ -37,7 +35,6 @@ import org.junit.jupiter.api.Order;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.TestMethodOrder;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.Mock;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
@ -83,7 +80,6 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.rules.RulesIntegrationTest;
|
||||
import com.iqser.red.service.redaction.v1.server.service.document.DocumentGraphMapper;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.DocumentDataFallbackService;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService;
|
||||
@ -102,7 +98,6 @@ public class RedactionIntegrationTest extends RulesIntegrationTest {
|
||||
|
||||
private static final String RULES = loadFromClassPath("drools/rules.drl");
|
||||
|
||||
|
||||
@Configuration
|
||||
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
|
||||
@Import({LayoutParsingServiceProcessorConfiguration.class})
|
||||
@ -1233,6 +1228,44 @@ public class RedactionIntegrationTest extends RulesIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@Disabled
|
||||
public void testImportedRedactions2() throws IOException {
|
||||
|
||||
String outputFileName = OsUtils.getTemporaryDirectory() + "/ImportedRedactions.pdf";
|
||||
ClassPathResource importedRedactions = new ClassPathResource("files/ImportedRedactions/76c5683ebc8c19dc23eccea12dfc652b.IMPORTED_REDACTIONS.json");
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage("files/ImportedRedactions/76c5683ebc8c19dc23eccea12dfc652b.ORIGIN.pdf");
|
||||
storageService.storeObject(TenantContext.getTenantId(),
|
||||
StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS),
|
||||
importedRedactions.getInputStream());
|
||||
|
||||
analyzeDocumentStructure(LayoutParsingType.DOCUMINE_OLD, request);
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
|
||||
entityLog.getEntityLogEntry()
|
||||
.forEach(entry -> {
|
||||
if (entry.getValue() == null) {
|
||||
return;
|
||||
}
|
||||
if (entry.getValue().equals("David")) {
|
||||
assertThat(entry.getImportedRedactionIntersections()).hasSize(1);
|
||||
}
|
||||
if (entry.getValue().equals("annotation")) {
|
||||
assertThat(entry.getImportedRedactionIntersections()).isEmpty();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testExpandByPrefixRegEx() throws IOException {
|
||||
|
||||
@ -2252,9 +2285,7 @@ public class RedactionIntegrationTest extends RulesIntegrationTest {
|
||||
@Test
|
||||
public void testFileWithImagesAndNoText() {
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/new/only_images.pdf",
|
||||
"files/cv_service_empty_response.json",
|
||||
"files/only_images_file_image_response.json");
|
||||
AnalyzeRequest request = prepareStorage("files/new/only_images.pdf", "files/cv_service_empty_response.json", "files/only_images_file_image_response.json");
|
||||
|
||||
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
@ -11,10 +11,10 @@ public class TextNormalizationUtilitiesTest {
|
||||
public void testHyphenRemoval() {
|
||||
|
||||
String test = "Without these peo-\nple, this conference would not happen";
|
||||
Assertions.assertThat(TextNormalizationUtilities.removeHyphenLineBreaks(test)).contains("people");
|
||||
Assertions.assertThat(TextNormalizationUtilities.removeHyphenLinebreaks(test)).contains("people");
|
||||
|
||||
test = "Die\t\nFreiwillige\t Versicherung\t endet\t zudem\t für\t den\t ein\u00AD\nzelnen\tVersicherten\tmit\tder\tAufhebung\tdes\tVertra-\nges,\t seiner\t Unterstellung\t unter\t die\t obligatorische\t\nVersicherung\t oder\t seinem\t Ausschluss.";
|
||||
Assertions.assertThat(TextNormalizationUtilities.removeHyphenLineBreaks(test)).contains("einzelnen", "Vertrages");
|
||||
Assertions.assertThat(TextNormalizationUtilities.removeHyphenLinebreaks(test)).contains("einzelnen", "Vertrages");
|
||||
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user