RED-3553 If image overlaps entity and the entity will be removed

This commit is contained in:
Philipp Schramm 2022-03-17 07:46:11 +01:00
parent c8cf0078eb
commit 1f6a9aa14d
2 changed files with 65 additions and 25 deletions

View File

@ -61,6 +61,8 @@ public class EntityRedactionService {
}
Map<Integer, List<Entity>> entitiesPerPage = convertToEntitiesPerPage(entities);
EntitySearchUtils.removeEntitiesContainedInRedactedLogos(imagesPerPage, entitiesPerPage);
return new PageEntities(entitiesPerPage, imagesPerPage);
}

View File

@ -17,6 +17,7 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import lombok.experimental.UtilityClass;
@ -54,8 +55,8 @@ public class EntitySearchUtils {
}
public Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber,
boolean isDictionaryEntry, boolean isDossierDictionary, Engine engine, boolean ignoreMinLength) {
public Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber, boolean isDictionaryEntry, boolean isDossierDictionary,
Engine engine, boolean ignoreMinLength) {
Set<Entity> found = new HashSet<>();
@ -98,13 +99,10 @@ public class EntitySearchUtils {
for (String word : entitiesByWord.keySet()) {
List<Entity> orderedEntities = entitiesByWord.get(word)
.stream()
.sorted(Comparator.comparing(Entity::getStart))
.collect(Collectors.toList());
List<Entity> orderedEntities = entitiesByWord.get(word).stream().sorted(Comparator.comparing(Entity::getStart)).collect(Collectors.toList());
Entity firstEntity = orderedEntities.get(0);
List<EntityPositionSequence> positionSequences = text.getSequences(firstEntity.getWord()
.trim(), dictionary == null ? true : dictionary.isCaseInsensitiveDictionary(firstEntity.getType()), firstEntity.getTargetSequences());
.trim(), dictionary == null || dictionary.isCaseInsensitiveDictionary(firstEntity.getType()), firstEntity.getTargetSequences());
for (int i = 0; i <= orderedEntities.size() - 1; i++) {
try {
@ -129,7 +127,7 @@ public class EntitySearchUtils {
if (inner.getWord().length() < word.getWord()
.length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner && word.getSectionNumber() == inner.getSectionNumber()) {
// FIXME this is workaround for RED-3327 and should be removed in the future.
if(word.getType().contains("recommendation_") && !inner.getType().contains("recommendation_")) {
if (word.getType().contains("recommendation_") && !inner.getType().contains("recommendation_")) {
wordsToRemove.add(word);
} else {
wordsToRemove.add(inner);
@ -137,7 +135,35 @@ public class EntitySearchUtils {
}
}
}
entities.removeAll(wordsToRemove);
wordsToRemove.forEach(entities::remove);
}
public static void removeEntitiesContainedInRedactedLogos(Map<Integer, Set<Image>> imagesPerPage, Map<Integer, List<Entity>> entitiesPerPage) {
if (entitiesPerPage != null && imagesPerPage != null) {
for (int page : imagesPerPage.keySet()) {
Set<Entity> removeEntitiesFromPage = new HashSet<>();
if (!entitiesPerPage.containsKey(page)) {
continue;
}
imagesPerPage.get(page).forEach(image -> {
if (image.isRedaction()) {
entitiesPerPage.get(page).forEach(entity -> {
if (isImageOverlappingEntity(image, entity)) {
log.info("Logo overlaps entity and entity will be removed");
removeEntitiesFromPage.add(entity);
}
});
}
});
entitiesPerPage.get(page).removeAll(removeEntitiesFromPage);
}
}
}
@ -175,22 +201,17 @@ public class EntitySearchUtils {
}
public void addOrAddEngine(Set<Entity> existing, Set<Entity> toBeAdded) {
private boolean isImageOverlappingEntity(Image image, Entity entity) {
for (Entity toAdd : toBeAdded) {
if (existing.contains(toAdd)) {
Optional<Entity> existingOptional = existing.stream()
.filter(entity -> entity.equals(toAdd))
.findFirst();
if (!existingOptional.isPresent()) {
return;
}
var existingEntity = existingOptional.get();
existingEntity.getEngines().addAll(toAdd.getEngines());
} else {
existing.add(toAdd);
}
}
return image.getPosition() != null && entity.getPositionSequences() != null && entity.getPositionSequences().get(0) != null && entity.getPositionSequences()
.get(0)
.getSequences() != null && entity.getPositionSequences().get(0).getSequences().get(0) != null && image.getPosition().getX() < entity.getPositionSequences()
.get(0)
.getSequences()
.get(0)
.getX1() && image.getPosition().getX() + image.getPosition().getWidth() > entity.getPositionSequences().get(0).getSequences().get(0).getX2() && image.getPosition()
.getY() < entity.getPositionSequences().get(0).getSequences().get(0).getY1() && image.getPosition().getY() + image.getPosition()
.getHeight() > entity.getPositionSequences().get(0).getSequences().get(0).getY2();
}
@ -210,11 +231,28 @@ public class EntitySearchUtils {
}
public void addOrAddEngine(Set<Entity> existing, Set<Entity> toBeAdded) {
for (Entity toAdd : toBeAdded) {
if (existing.contains(toAdd)) {
Optional<Entity> existingOptional = existing.stream().filter(entity -> entity.equals(toAdd)).findFirst();
if (!existingOptional.isPresent()) {
return;
}
var existingEntity = existingOptional.get();
existingEntity.getEngines().addAll(toAdd.getEngines());
} else {
existing.add(toAdd);
}
}
}
private boolean overlaps(Set<Entity> existingEntities, Entity found) {
for (Entity existing : existingEntities) {
if(existing.getStart().equals(found.getStart())){
if (existing.getStart().equals(found.getStart())) {
continue;
}