RED-871: Fixed endless processing on document with corrupted contentStream

This commit is contained in:
deiflaender 2020-12-11 11:26:48 +01:00
parent 50ec16601c
commit bfa363a3d2
2 changed files with 18 additions and 1 deletions

View File

@ -188,7 +188,13 @@ public class Section {
for (String value : values) {
if (StringUtils.isNotBlank(value)) {
String[] lines = value.split("\n");
for (String line : lines) {
if (line.trim().length() <= 2) {
return;
}
Set<Entity> found = findEntities(line.trim(), asType, false);
// HashSet keeps the older value, but we want the new only.
@ -216,6 +222,10 @@ public class Section {
private Set<Entity> findEntities(String value, String asType, boolean caseinsensitive) {
if (value.trim().length() <= 2) {
return new HashSet<>();
}
Set<Entity> found = new HashSet<>();
String text = caseinsensitive ? searchText.toLowerCase() : searchText;
@ -294,7 +304,8 @@ public class Section {
entity.setRedaction(redact);
entity.setMatchedRule(ruleNumber);
entity.setRedactionReason(reason);
entity.setTargetSequences(value.getTextBlocks().get(0)
entity.setTargetSequences(value.getTextBlocks()
.get(0)
.getSequences()); // Make sure no other cells with same content are highlighted
entity.setLegalBasis(legalBasis);

View File

@ -246,7 +246,13 @@ public class EntityRedactionService {
private Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber, boolean local) {
Set<Entity> found = new HashSet<>();
for (String value : values) {
if(value.trim().length() <= 2) {
continue;
}
int startIndex;
int stopIndex = 0;
do {