RED-9148 - Add paragraphPageIdx to entity log

This commit is contained in:
Andrei Isvoran 2024-05-21 15:31:46 +03:00
parent a538fbbc22
commit ad8964f4b0
3 changed files with 69 additions and 5 deletions

View File

@ -16,7 +16,7 @@ val layoutParserVersion = "0.116.0"
val jacksonVersion = "2.15.2"
val droolsVersion = "9.44.0.Final"
val pdfBoxVersion = "3.0.0"
val persistenceServiceVersion = "2.411.0"
val persistenceServiceVersion = "2.420.0"
val springBootStarterVersion = "3.1.5"
val springCloudVersion = "4.0.4"
val testContainersVersion = "1.19.7"

View File

@ -5,6 +5,7 @@ import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
@ -34,6 +35,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.service.EntityChangeLogService.EntryChanges;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
@ -210,6 +212,7 @@ public class EntityLogCreatorService {
.state(buildEntryState(image))
.entryType(isHint ? EntryType.IMAGE_HINT : EntryType.IMAGE)
.engines(getEngines(null, image.getManualOverwrite()))
.paragraphPageIdx(-1)
.build();
}
@ -250,6 +253,7 @@ public class EntityLogCreatorService {
.imported(false)
.reference(Collections.emptySet())
.manualChanges(ManualChangeFactory.toLocalManualChangeList(precursorEntity.getManualOverwrite().getManualChangeLog(), true))
.paragraphPageIdx(-1)
.build();
}
@ -262,7 +266,9 @@ public class EntityLogCreatorService {
.filter(TextEntity::active)
.forEach(ref -> ref.getPositionsOnPagePerPage()
.forEach(pos -> referenceIds.add(pos.getId())));
boolean isHint = isHint(entity.getEntityType());
EntryType entryType = buildEntryType(entity);
return EntityLogEntry.builder()
.reason(entity.buildReasonWithManualChangeDescriptions())
.legalBasis(entity.legalBasis())
@ -288,11 +294,37 @@ public class EntityLogCreatorService {
.reference(referenceIds)
.manualChanges(ManualChangeFactory.toLocalManualChangeList(entity.getManualOverwrite().getManualChangeLog(), true))
.state(buildEntryState(entity))
.entryType(buildEntryType(entity))
.entryType(entryType)
.paragraphPageIdx(determinePageParagraphIndex(entity, entryType))
.build();
}
private int determinePageParagraphIndex(TextEntity textEntity, EntryType entryType) {
int pageParagraphIdx = -1;
if (entryType.equals(EntryType.IMAGE) || entryType.equals(EntryType.IMAGE_HINT) || entryType.equals(EntryType.AREA)) {
return pageParagraphIdx;
}
Optional<SemanticNode> firstIntersectingLeafNode = textEntity.getIntersectingNodes()
.stream()
.filter(SemanticNode::isLeaf)
.findFirst();
if (firstIntersectingLeafNode.isPresent()) {
Optional<AtomicTextBlock> firstAtomicTextBlock = firstIntersectingLeafNode.get().getLeafTextBlock().getAtomicTextBlocks()
.stream()
.findFirst();
if (firstAtomicTextBlock.isPresent()) {
pageParagraphIdx = firstAtomicTextBlock.get().getNumberOnPage();
}
}
return pageParagraphIdx;
}
private Set<Engine> getEngines(Set<Engine> currentEngines, ManualChangeOverwrite manualChangeOverwrite) {
Set<Engine> engines = currentEngines != null ? new HashSet<>(currentEngines) : new HashSet<>();
@ -360,7 +392,9 @@ public class EntityLogCreatorService {
.collect(Collectors.toList());
}
private String buildSectionString(SemanticNode node) {
return node.getType().toString() + ": " + node.getTextBlock().buildSummary();
}

View File

@ -1465,6 +1465,7 @@ public class RedactionIntegrationTest extends RulesIntegrationTest {
.get().getState(), EntryState.REMOVED);
}
@Test
@SneakyThrows
public void testReAddingSameManualRedaction() {
@ -1512,7 +1513,6 @@ public class RedactionIntegrationTest extends RulesIntegrationTest {
.findFirst()
.get().getState(), EntryState.REMOVED);
manualRedactionEntry.setRequestDate(OffsetDateTime.now());
idRemoval.setProcessedDate(OffsetDateTime.now());
request.setManualRedactions(ManualRedactions.builder().entriesToAdd(Set.of(manualRedactionEntry)).idsToRemove(Set.of(idRemoval)).build());
@ -1531,6 +1531,7 @@ public class RedactionIntegrationTest extends RulesIntegrationTest {
.get().getState(), EntryState.APPLIED);
}
@Test
@SneakyThrows
public void testRemoveRectangleRedaction() {
@ -1680,7 +1681,8 @@ public class RedactionIntegrationTest extends RulesIntegrationTest {
assertEquals(resizedEntity.getState(), EntryState.APPLIED);
assertEquals(resizedEntity.getValue(), "David");
assertEquals(1, resizedEntity.getManualChanges().size());
assertEquals(resizedEntity.getManualChanges().get(0).getManualRedactionType(), ManualRedactionType.RESIZE);
assertEquals(resizedEntity.getManualChanges()
.get(0).getManualRedactionType(), ManualRedactionType.RESIZE);
}
@ -1802,6 +1804,34 @@ public class RedactionIntegrationTest extends RulesIntegrationTest {
}
@Test
public void testPageParagraphIndex() {
AnalyzeRequest request = uploadFileToStorage("files/new/crafted document.pdf");
System.out.println("Start Full integration test");
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
System.out.println("Finished structure analysis");
analyzeService.analyze(request);
System.out.println("Finished analysis");
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var davidKsenia = entityLog.getEntityLogEntry()
.stream()
.filter(e -> e.getValue().equals("David Ksenia"))
.findFirst();
assertTrue(davidKsenia.isPresent());
assertEquals(davidKsenia.get().getParagraphPageIdx(), 5);
var netherlands = entityLog.getEntityLogEntry()
.stream()
.filter(e -> e.getValue().equals("Netherlands"))
.findFirst();
assertTrue(netherlands.isPresent());
assertEquals(netherlands.get().getParagraphPageIdx(), 3);
}
private IdRemoval getIdRemoval(String id) {
return IdRemoval.builder()