Merge branch 'RED-7384' into 'master'

RED-7834: additional fixes for migration

Closes RED-7384

See merge request redactmanager/redaction-service!234
This commit is contained in:
Kilian Schüttler 2023-12-20 12:38:25 +01:00
commit 2423989192
14 changed files with 183 additions and 51 deletions

View File

@ -58,6 +58,8 @@ public class RedactionLogToEntityLogMigrationService {
List<MigrationEntity> entitiesToMigrate = calculateMigrationEntitiesFromRedactionLog(redactionLog, document);
MigratedIds migratedIds = entitiesToMigrate.stream().collect(new MigratedIdsCollector());
MigratedIds idsToMigrateInDb = entitiesToMigrate.stream().filter(MigrationEntity::hasManualChangesOrComments).collect(new MigratedIdsCollector());
EntityLog entityLog = new EntityLog();
entityLog.setAnalysisNumber(redactionLog.getAnalysisNumber());
@ -79,7 +81,7 @@ public class RedactionLogToEntityLogMigrationService {
throw new AssertionError(message);
}
return new MigratedEntityLog(migratedIds, entityLog);
return new MigratedEntityLog(idsToMigrateInDb, entityLog);
}
@ -87,7 +89,7 @@ public class RedactionLogToEntityLogMigrationService {
return redactionLog.getRedactionLogEntry()
.stream()
.filter(redactionLogEntry -> redactionLogEntry.getManualChanges()
.filter(redactionLogEntry -> redactionLogEntry.getManualChanges() == null || redactionLogEntry.getManualChanges()
.stream()
.allMatch(manualChange -> manualChange.getAnnotationStatus().equals(AnnotationStatus.APPROVED)))
.count();
@ -115,7 +117,7 @@ public class RedactionLogToEntityLogMigrationService {
List<RedactionLogEntry> redactionLogImages = redactionLog.getRedactionLogEntry()
.stream()
.filter(RedactionLogEntry::isImage)
.filter(redactionLogEntry -> redactionLogEntry.getManualChanges()
.filter(redactionLogEntry -> redactionLogEntry.getManualChanges() == null || redactionLogEntry.getManualChanges()
.stream()
.allMatch(manualChange -> manualChange.getAnnotationStatus().equals(AnnotationStatus.APPROVED)))
.toList();
@ -168,17 +170,19 @@ public class RedactionLogToEntityLogMigrationService {
return image;
}
private static Rectangle2D toRectangle2D(Rectangle rect) {
return new Rectangle2D.Double(rect.getTopLeft().getX(), rect.getTopLeft().getY(), rect.getWidth(), rect.getHeight());
}
private List<MigrationEntity> getTextBasedMigrationEntities(RedactionLog redactionLog, Document document) {
List<MigrationEntity> entitiesToMigrate = redactionLog.getRedactionLogEntry()
.stream()
.filter(redactionLogEntry -> !redactionLogEntry.isImage())
.filter(redactionLogEntry -> redactionLogEntry.getManualChanges()
.filter(redactionLogEntry -> redactionLogEntry.getManualChanges() == null || redactionLogEntry.getManualChanges()
.stream()
.allMatch(manualChange -> manualChange.getAnnotationStatus().equals(AnnotationStatus.APPROVED)))
.map(MigrationEntity::fromRedactionLogEntry)

View File

@ -165,11 +165,7 @@ public final class MigrationEntity {
throw new UnsupportedOperationException("Unknown subclass " + migratedEntity.getClass());
}
entityLogEntry.setChanges(redactionLogEntry.getChanges().stream().map(MigrationEntity::toEntityLogChanges).toList());
entityLogEntry.setManualChanges(redactionLogEntry.getManualChanges()
.stream()
.filter(manualChange -> manualChange.getAnnotationStatus().equals(AnnotationStatus.APPROVED))
.map(MigrationEntity::toEntityLogManualChanges)
.toList());
entityLogEntry.setManualChanges(migrateManualChanges(redactionLogEntry.getManualChanges()));
entityLogEntry.setColor(redactionLogEntry.getColor());
entityLogEntry.setReference(migrateSetOfIds(redactionLogEntry.getReference(), oldToNewIdMapping));
entityLogEntry.setImportedRedactionIntersections(migrateSetOfIds(redactionLogEntry.getImportedRedactionIntersections(), oldToNewIdMapping));
@ -182,6 +178,18 @@ public final class MigrationEntity {
}
private List<com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualChange> migrateManualChanges(List<ManualChange> manualChanges) {
if (manualChanges == null) {
return Collections.emptyList();
}
return manualChanges.stream()
.filter(manualChange -> manualChange.getAnnotationStatus().equals(AnnotationStatus.APPROVED))
.map(MigrationEntity::toEntityLogManualChanges)
.toList();
}
private static Set<com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine> getMigratedEngines(RedactionLogEntry entry) {
if (entry.getEngines() == null) {
@ -328,4 +336,11 @@ public final class MigrationEntity {
throw new UnsupportedOperationException(String.format("Entity subclass %s is not implemented!", entity.getClass()));
}
public boolean hasManualChangesOrComments() {
return !(redactionLogEntry.getManualChanges() == null || redactionLogEntry.getManualChanges().isEmpty()) || //
!(redactionLogEntry.getComments() == null || redactionLogEntry.getComments().isEmpty());
}
}

View File

@ -1,7 +1,9 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import java.awt.geom.Rectangle2D;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
@ -36,6 +38,9 @@ public class Footer implements GenericSemanticNode {
@EqualsAndHashCode.Exclude
Set<TextEntity> entities = new HashSet<>();
@EqualsAndHashCode.Exclude
Map<Page, Rectangle2D> bBoxCache;
@Override
public NodeType getType() {
@ -71,4 +76,14 @@ public class Footer implements GenericSemanticNode {
return treeId + ": " + NodeType.FOOTER + ": " + leafTextBlock.buildSummary();
}
@Override
public Map<Page, Rectangle2D> getBBox() {
if (bBoxCache == null) {
bBoxCache = GenericSemanticNode.super.getBBox();
}
return bBoxCache;
}
}

View File

@ -1,7 +1,9 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import java.awt.geom.Rectangle2D;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
@ -36,6 +38,9 @@ public class Header implements GenericSemanticNode {
@EqualsAndHashCode.Exclude
Set<TextEntity> entities = new HashSet<>();
@EqualsAndHashCode.Exclude
Map<Page, Rectangle2D> bBoxCache;
@Override
public boolean isLeaf() {
@ -71,4 +76,14 @@ public class Header implements GenericSemanticNode {
return treeId + ": " + NodeType.HEADER + ": " + leafTextBlock.buildSummary();
}
@Override
public Map<Page, Rectangle2D> getBBox() {
if (bBoxCache == null) {
bBoxCache = GenericSemanticNode.super.getBBox();
}
return bBoxCache;
}
}

View File

@ -1,7 +1,9 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import java.awt.geom.Rectangle2D;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
@ -35,6 +37,9 @@ public class Headline implements GenericSemanticNode {
@EqualsAndHashCode.Exclude
Set<TextEntity> entities = new HashSet<>();
@EqualsAndHashCode.Exclude
Map<Page, Rectangle2D> bBoxCache;
@Override
public NodeType getType() {
@ -71,6 +76,16 @@ public class Headline implements GenericSemanticNode {
}
@Override
public Map<Page, Rectangle2D> getBBox() {
if (bBoxCache == null) {
bBoxCache = GenericSemanticNode.super.getBBox();
}
return bBoxCache;
}
@Override
public SectionIdentifier getSectionIdentifier() {

View File

@ -1,7 +1,9 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import java.awt.geom.Rectangle2D;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
@ -31,6 +33,9 @@ public class Paragraph implements GenericSemanticNode {
@EqualsAndHashCode.Exclude
Set<TextEntity> entities = new HashSet<>();
@EqualsAndHashCode.Exclude
Map<Page, Rectangle2D> bBoxCache;
@Override
public NodeType getType() {
@ -59,4 +64,14 @@ public class Paragraph implements GenericSemanticNode {
return treeId + ": " + NodeType.PARAGRAPH + ": " + leafTextBlock.buildSummary();
}
@Override
public Map<Page, Rectangle2D> getBBox() {
if (bBoxCache == null) {
bBoxCache = GenericSemanticNode.super.getBBox();
}
return bBoxCache;
}
}

View File

@ -1,7 +1,9 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import java.awt.geom.Rectangle2D;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
@ -34,6 +36,9 @@ public class Section implements GenericSemanticNode {
@EqualsAndHashCode.Exclude
Set<TextEntity> entities = new HashSet<>();
@EqualsAndHashCode.Exclude
Map<Page, Rectangle2D> bBoxCache;
@Override
public NodeType getType() {
@ -92,4 +97,14 @@ public class Section implements GenericSemanticNode {
return streamAllSubNodesOfType(NodeType.HEADLINE).anyMatch(h -> h.containsStringIgnoreCase(value));
}
@Override
public Map<Page, Rectangle2D> getBBox() {
if (bBoxCache == null) {
bBoxCache = GenericSemanticNode.super.getBBox();
}
return bBoxCache;
}
}

View File

@ -61,6 +61,7 @@ public interface SemanticNode {
return getTextBlock().getPages();
}
/**
* Finds the first page associated with this Node.
*
@ -272,11 +273,7 @@ public interface SemanticNode {
*/
default boolean hasEntitiesOfAllTypes(String... types) {
return getEntities().stream()
.filter(TextEntity::active)
.map(TextEntity::getType)
.collect(Collectors.toUnmodifiableSet())
.containsAll(Arrays.stream(types).toList());
return getEntities().stream().filter(TextEntity::active).map(TextEntity::getType).collect(Collectors.toUnmodifiableSet()).containsAll(Arrays.stream(types).toList());
}
@ -433,6 +430,7 @@ public interface SemanticNode {
/**
* Checks whether this SemanticNode contains exactly the provided String as a word.
*
* @param word - String which the TextBlock might contain
* @return true, if this node's TextBlock contains string
*/
@ -444,6 +442,7 @@ public interface SemanticNode {
/**
* Checks whether this SemanticNode contains exactly the provided String as a word ignoring case.
*
* @param word - String which the TextBlock might contain
* @return true, if this node's TextBlock contains string
*/
@ -455,6 +454,7 @@ public interface SemanticNode {
/**
* Checks whether this SemanticNode contains any of the provided Strings as a word.
*
* @param words - A List of Strings which the TextBlock might contain
* @return true, if this node's TextBlock contains any of the provided strings
*/
@ -466,6 +466,7 @@ public interface SemanticNode {
/**
* Checks whether this SemanticNode contains any of the provided Strings as a word ignoring case.
*
* @param words - A List of Strings which the TextBlock might contain
* @return true, if this node's TextBlock contains any of the provided strings
*/
@ -477,6 +478,7 @@ public interface SemanticNode {
/**
* Checks whether this SemanticNode contains all the provided Strings as word.
*
* @param words - A List of Strings which the TextBlock might contain
* @return true, if this node's TextBlock contains all the provided strings
*/
@ -488,6 +490,7 @@ public interface SemanticNode {
/**
* Checks whether this SemanticNode contains all the provided Strings as word ignoring case.
*
* @param words - A List of Strings which the TextBlock might contain
* @return true, if this node's TextBlock contains all the provided strings
*/
@ -496,6 +499,7 @@ public interface SemanticNode {
return Arrays.stream(words).map(String::toLowerCase).allMatch(word -> getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(word::equals));
}
/**
* Checks whether this SemanticNode matches the provided regex pattern.
*
@ -522,10 +526,11 @@ public interface SemanticNode {
/**
* Checks whether this SemanticNode intersects the provided rectangle.
* @param x the lower left corner X value
* @param y the lower left corner Y value
* @param w width
* @param h height
*
* @param x the lower left corner X value
* @param y the lower left corner Y value
* @param w width
* @param h height
* @param pageNumber the pageNumber of the rectangle
* @return true if intersects, false otherwise
*/
@ -630,12 +635,11 @@ public interface SemanticNode {
*/
default Map<Page, Rectangle2D> getBBox() {
Map<Page, Rectangle2D> bBoxPerPage = new HashMap<>();
if (isLeaf()) {
return getBBoxFromLeafTextBlock(bBoxPerPage);
return getBBoxFromLeafTextBlock();
}
return getBBoxFromChildren(bBoxPerPage);
return getBBoxFromChildren();
}
@ -658,28 +662,35 @@ public interface SemanticNode {
/**
* TODO: this produces unwanted results for sections spanning multiple columns.
* Computes the Union of the bounding boxes of all children recursively.
*
* @param bBoxPerPage initial empty BoundingBox
* @return The union of the BoundingBoxes of all children
*/
private Map<Page, Rectangle2D> getBBoxFromChildren(Map<Page, Rectangle2D> bBoxPerPage) {
private Map<Page, Rectangle2D> getBBoxFromChildren() {
return streamChildren().map(SemanticNode::getBBox).reduce((map1, map2) -> {
map1.forEach((page, rectangle) -> map2.merge(page, rectangle, (rect1, rect2) -> rect1.createUnion(rect2).getBounds2D()));
return map2;
}).orElse(bBoxPerPage);
Map<Page, Rectangle2D> bBoxPerPage = new HashMap<>();
List<Map<Page, Rectangle2D>> childrenBBoxes = streamChildren().map(SemanticNode::getBBox).toList();
Set<Page> pages = childrenBBoxes.stream().flatMap(map -> map.keySet().stream()).collect(Collectors.toSet());
for (Page page : pages) {
Rectangle2D bBoxOnPage = childrenBBoxes.stream()
.filter(childBboxPerPage -> childBboxPerPage.containsKey(page))
.map(childBboxPerPage -> childBboxPerPage.get(page))
.collect(RectangleTransformations.collectBBox());
bBoxPerPage.put(page, bBoxOnPage);
}
return bBoxPerPage;
}
/**
* @param bBoxPerPage initial empty BoundingBox
* @return The union of all BoundingBoxes of the TextBlock of this node
*/
private Map<Page, Rectangle2D> getBBoxFromLeafTextBlock(Map<Page, Rectangle2D> bBoxPerPage) {
private Map<Page, Rectangle2D> getBBoxFromLeafTextBlock() {
Map<Page, Rectangle2D> bBoxPerPage = new HashMap<>();
Map<Page, List<AtomicTextBlock>> atomicTextBlockPerPage = getTextBlock().getAtomicTextBlocks().stream().collect(Collectors.groupingBy(AtomicTextBlock::getPage));
atomicTextBlockPerPage.forEach((page, atomicTextBlocks) -> bBoxPerPage.put(page, RectangleTransformations.atomicTextBlockBBox(atomicTextBlocks)));
return bBoxPerPage;
}
}
}

View File

@ -2,10 +2,12 @@ package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import static java.lang.String.format;
import java.awt.geom.Rectangle2D;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.IntStream;
import java.util.stream.Stream;
@ -40,6 +42,19 @@ public class Table implements SemanticNode {
@EqualsAndHashCode.Exclude
Set<TextEntity> entities = new HashSet<>();
@EqualsAndHashCode.Exclude
Map<Page, Rectangle2D> bBoxCache;
@Override
public Map<Page, Rectangle2D> getBBox() {
if (bBoxCache == null) {
bBoxCache = SemanticNode.super.getBBox();
}
return bBoxCache;
}
/**
* Streams all entities in this table, that appear in a row, which contains any of the provided strings.
@ -172,6 +187,7 @@ public class Table implements SemanticNode {
return streamChildrenOfType(NodeType.TABLE_CELL).map(node -> (TableCell) node);
}
/**
* Streams all TableCells that contain at least one entity of a given type in this Table row-wise.
*
@ -180,8 +196,7 @@ public class Table implements SemanticNode {
*/
public Stream<TableCell> streamTableCellsWhichContainType(String type) {
return streamTableCells()
.filter(tableCell -> tableCell.getEntities().stream().filter(TextEntity::active).anyMatch(entity -> entity.getType().equals(type)));
return streamTableCells().filter(tableCell -> tableCell.getEntities().stream().filter(TextEntity::active).anyMatch(entity -> entity.getType().equals(type)));
}
@ -267,7 +282,11 @@ public class Table implements SemanticNode {
*/
public boolean hasHeaderIgnoreCase(String header) {
return streamHeaders().anyMatch(tableCellNode -> tableCellNode.getTextBlock().getSearchText().strip().toLowerCase(Locale.ENGLISH).equals(header.toLowerCase(Locale.ENGLISH)));
return streamHeaders().anyMatch(tableCellNode -> tableCellNode.getTextBlock()
.getSearchText()
.strip()
.toLowerCase(Locale.ENGLISH)
.equals(header.toLowerCase(Locale.ENGLISH)));
}
@ -301,7 +320,7 @@ public class Table implements SemanticNode {
* Finds all entities of the provided type, which appear in the same row that the provided entity appears in.
* Ignores Entity with ignored == true or removed == true.
*
* @param type the type of entities to search for
* @param type the type of entities to search for
* @param textEntity the entity, which appears in the row to search
* @return List of all entities of the provided type, which appear in the same row that the provided entity appears in.
*/

View File

@ -60,7 +60,8 @@ public class ImportedRedactionService {
@Timed("redactmanager_processImportedRedactions")
public List<EntityLogEntry> processImportedEntities(String dossierTemplateId,
String dossierId,
String fileId, List<EntityLogEntry> entityLogEntries,
String fileId,
List<EntityLogEntry> entityLogEntries,
boolean addImportedRedactions) {
ImportedRedactions importedRedactions = redactionStorageService.getImportedRedactions(dossierId, fileId);
@ -108,7 +109,8 @@ public class ImportedRedactionService {
for (var importedRedaction : importedRedactionsValues) {
EntityLogEntry redactionLogEntry = EntityLogEntry.builder()
.id(importedRedaction.getId())
.type(IMPORTED_REDACTION_TYPE).entryType(EntryType.ENTITY)
.type(IMPORTED_REDACTION_TYPE)
.entryType(EntryType.ENTITY)
.imported(true)
.state(EntryState.APPLIED)
.positions(importedRedaction.getPositions())
@ -159,19 +161,19 @@ public class ImportedRedactionService {
}
private void addIntersections(EntityLogEntry redactionLogEntry, ImportedRedactions importedRedactions) {
private void addIntersections(EntityLogEntry entityLogEntry, ImportedRedactions importedRedactions) {
for (Position rectangle : redactionLogEntry.getPositions()) {
for (Position rectangle : entityLogEntry.getPositions()) {
var normalizedRectangle = normalize(rectangle);
if (importedRedactions.getImportedRedactions().containsKey(rectangle.getPageNumber())) {
var importedRedactionsOnPage = importedRedactions.getImportedRedactions().get(rectangle.getPageNumber());
for (ImportedRedaction importedRedaction : importedRedactionsOnPage) {
for (Position importedRedactionPosition : importedRedaction.getPositions()) {
if (rectOverlap(normalizedRectangle, normalize(importedRedactionPosition))) {
if (redactionLogEntry.getImportedRedactionIntersections() == null) {
redactionLogEntry.setImportedRedactionIntersections(new HashSet<>());
if (entityLogEntry.getImportedRedactionIntersections() == null) {
entityLogEntry.setImportedRedactionIntersections(new HashSet<>());
}
redactionLogEntry.getImportedRedactionIntersections().add(importedRedaction.getId());
entityLogEntry.getImportedRedactionIntersections().add(importedRedaction.getId());
}
}
}

View File

@ -70,6 +70,12 @@ public class RectangleTransformations {
}
public static Collector<Rectangle2D, Rectangle2DBBoxCollector.BBox, Rectangle2D> collectBBox() {
return new Rectangle2DBBoxCollector();
}
private static class Rectangle2DBBoxCollector implements Collector<Rectangle2D, Rectangle2DBBoxCollector.BBox, Rectangle2D> {
@Override

View File

@ -1,5 +1,5 @@
server:
port: 8083
port: 8077
persistence-service.url: "http://localhost:8085"
tenant-user-management-service.url: "http://localhost:8091/internal"

View File

@ -92,7 +92,7 @@ public class MigrationIntegrationTest extends BuildDocumentIntegrationTest {
redactionStorageService.storeObject(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ENTITY_LOG, migratedEntityLog.getEntityLog());
assertEquals(redactionLog.getRedactionLogEntry().size(), migratedEntityLog.getEntityLog().getEntityLogEntry().size());
assertEquals(redactionLog.getRedactionLogEntry().size(), migratedEntityLog.getMigratedIds().getMappings().size());
assertEquals(redactionLog.getRedactionLogEntry().stream().filter(entry -> !entry.getManualChanges().isEmpty()).count(), migratedEntityLog.getMigratedIds().getMappings().size());
EntityLog entityLog = migratedEntityLog.getEntityLog();
assertEquals(redactionLog.getAnalysisNumber(), entityLog.getAnalysisNumber());
assertEquals(redactionLog.getAnalysisVersion(), entityLog.getAnalysisVersion());

View File

@ -16,8 +16,8 @@ class SemanticNodeComparatorsTest {
@Test
public void testFirstSemanticNode() {
var node = new Section(List.of(0, 1), null, null, null);
var otherNode = new Section(List.of(0, 2), null, null, null);
var node = new Section(List.of(0, 1), null, null, null, null);
var otherNode = new Section(List.of(0, 2), null, null, null, null);
List<SemanticNode> list = new ArrayList<>();
list.add(otherNode);
list.add(node);
@ -29,8 +29,8 @@ class SemanticNodeComparatorsTest {
@Test
public void testFirstSemanticNode2() {
var node = new Section(Collections.emptyList(), null, null, null);
var otherNode = new Section(List.of(0, 2), null, null, null);
var node = new Section(Collections.emptyList(), null, null, null, null);
var otherNode = new Section(List.of(0, 2), null, null, null, null);
List<SemanticNode> list = new ArrayList<>();
list.add(otherNode);
list.add(node);
@ -42,8 +42,8 @@ class SemanticNodeComparatorsTest {
@Test
public void testFirstSemanticNode3() {
var node = new Section(List.of(1, 5, 8), null, null, null);
var otherNode = new Section(List.of(0, 2), null, null, null);
var node = new Section(List.of(1, 5, 8), null, null, null, null);
var otherNode = new Section(List.of(0, 2), null, null, null, null);
List<SemanticNode> list = new ArrayList<>();
list.add(otherNode);
list.add(node);
@ -55,8 +55,8 @@ class SemanticNodeComparatorsTest {
@Test
public void testFirstSemanticNode4() {
var node = new Section(List.of(1, 5, 8), null, null, null);
var otherNode = new Section(List.of(1, 5, 9), null, null, null);
var node = new Section(List.of(1, 5, 8), null, null, null, null);
var otherNode = new Section(List.of(1, 5, 9), null, null, null, null);
List<SemanticNode> list = new ArrayList<>();
list.add(otherNode);
list.add(node);