DM-483: endless loops with false positive #159

Closed
kilian.schuettler1 wants to merge 6 commits from DM-483 into master
12 changed files with 278 additions and 314 deletions

View File

@ -71,7 +71,6 @@ public class AnalyzeService {
ComponentLogCreatorService componentLogCreatorService;
RedactionStorageService redactionStorageService;
RedactionChangeLogService redactionChangeLogService;
EntityChangeLogService entityChangeLogService;
LegalBasisClient legalBasisClient;
RedactionServiceSettings redactionServiceSettings;
ImportedRedactionService importedRedactionService;

View File

@ -26,9 +26,14 @@ public class EntityChangeLogService {
@Timed("redactmanager_computeChanges")
public boolean computeChanges(List<EntityLogEntry> previousEntityLogEntries, List<EntityLogEntry> newEntityLogEntries, int analysisNumber) {
var now = OffsetDateTime.now();
if (previousEntityLogEntries.isEmpty()) {
newEntityLogEntries.forEach(entry -> entry.getChanges().add(new Change(analysisNumber, ChangeType.ADDED, now)));
return true;
}
boolean hasChanges = false;
var now = OffsetDateTime.now();
for (EntityLogEntry entityLogEntry : newEntityLogEntries) {
Optional<EntityLogEntry> optionalPreviousEntity = previousEntityLogEntries.stream().filter(entry -> entry.getId().equals(entityLogEntry.getId())).findAny();
if (optionalPreviousEntity.isEmpty()) {
@ -46,11 +51,21 @@ public class EntityChangeLogService {
}
}
}
addRemovedEntriesAsRemoved(previousEntityLogEntries, newEntityLogEntries, analysisNumber, now);
return hasChanges;
}
private static void addRemovedEntriesAsRemoved(List<EntityLogEntry> previousEntityLogEntries,
List<EntityLogEntry> newEntityLogEntries,
int analysisNumber,
OffsetDateTime now) {
Set<String> existingIds = newEntityLogEntries.stream().map(EntityLogEntry::getId).collect(Collectors.toSet());
List<EntityLogEntry> removedEntries = previousEntityLogEntries.stream().filter(entry -> !existingIds.contains(entry.getId())).toList();
removedEntries.forEach(entry -> entry.getChanges().add(new Change(analysisNumber, ChangeType.REMOVED, now)));
removedEntries.forEach(entry -> entry.setState(EntryState.REMOVED));
return hasChanges;
newEntityLogEntries.addAll(removedEntries);
}

View File

@ -1,6 +1,5 @@
package com.iqser.red.service.redaction.v1.server.service;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
@ -12,8 +11,6 @@ import java.util.stream.Collectors;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Change;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ChangeType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogChanges;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
@ -33,21 +30,26 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntit
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
@Service
@Slf4j
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class EntityLogCreatorService {
private final DictionaryService dictionaryService;
private final ManualChangeFactory manualChangeFactory;
private final ImportedRedactionService importedRedactionService;
private final RedactionServiceSettings redactionServiceSettings;
private final LegalBasisClient legalBasisClient;
private final EntityChangeLogService entityChangeLogService;
DictionaryService dictionaryService;
ManualChangeFactory manualChangeFactory;
ImportedRedactionService importedRedactionService;
RedactionServiceSettings redactionServiceSettings;
LegalBasisClient legalBasisClient;
EntityChangeLogService entityChangeLogService;
RedactionStorageService redactionStorageService;
private static boolean notFalsePositiveOrFalseRecommendation(TextEntity textEntity) {
@ -65,7 +67,6 @@ public class EntityLogCreatorService {
List<EntityLogEntry> entityLogEntries = createEntityLogEntries(document, analyzeRequest.getDossierTemplateId(), notFoundManualRedactionEntries);
List<LegalBasis> legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId());
EntityLog entityLog = new EntityLog(redactionServiceSettings.getAnalysisVersion(),
analyzeRequest.getAnalysisNumber(),
entityLogEntries,
@ -82,13 +83,23 @@ public class EntityLogCreatorService {
true);
entityLog.setEntityLogEntry(importedRedactionFilteredEntries);
var now = OffsetDateTime.now();
entityLogEntries.forEach(entry -> entry.getChanges().add(new Change(analyzeRequest.getAnalysisNumber(), ChangeType.ADDED, now)));
List<EntityLogEntry> previousExistingEntityLogEntries = getPreviousEntityLogEntries(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
entityChangeLogService.computeChanges(previousExistingEntityLogEntries, entityLogEntries, analyzeRequest.getAnalysisNumber());
excludeExcludedPages(entityLog, analyzeRequest.getExcludedPages());
return entityLog;
}
private List<EntityLogEntry> getPreviousEntityLogEntries(String dossierId, String fileId) {
if (redactionStorageService.entityLogExists(dossierId, fileId)) {
return redactionStorageService.getEntityLog(dossierId, fileId).getEntityLogEntry();
} else {
return Collections.emptyList();
}
}
public EntityLogChanges updateVersionsAndReturnChanges(EntityLog entityLog, DictionaryVersion dictionaryVersion, String dossierTemplateId, boolean hasChanges) {
List<LegalBasis> legalBasis = legalBasisClient.getLegalBasisMapping(dossierTemplateId);
@ -207,8 +218,7 @@ public class EntityLogCreatorService {
boolean isHint = isHint(manualEntity.getEntityType());
return EntityLogEntry.builder().id(manualEntity.getId()).color(getColor(type, dossierTemplateId, manualEntity.applied(), isHint))
.reason(manualEntity.buildReasonWithManualChangeDescriptions())
.legalBasis(manualEntity.legalBasis())
.value(manualEntity.value()).type(type).state(buildEntryState(manualEntity)).entryType(buildEntryType(manualEntity))
.legalBasis(manualEntity.legalBasis()).value(manualEntity.value()).type(type).state(buildEntryState(manualEntity)).entryType(buildEntryType(manualEntity))
.section(manualEntity.getManualOverwrite().getSection().orElse(manualEntity.getSection()))
.containingNodeId(Collections.emptyList())
.closestHeadline("")

View File

@ -36,6 +36,7 @@ import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility;
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplementation;
import com.iqser.red.service.redaction.v1.server.utils.IdBuilder;
import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@ -250,20 +251,66 @@ public class EntityCreationService {
public Stream<TextEntity> bySearchImplementation(SearchImplementation searchImplementation, String type, EntityType entityType, SemanticNode node) {
return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange())
.stream().filter(boundary -> isValidEntityTextRange(node.getTextBlock(), boundary))
return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange()).stream().filter(boundary -> isValidEntityTextRange(node.getTextBlock(), boundary))
.map(bounds -> byTextRange(bounds, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
}
/**
* Creates a redaction entity based on the given boundary, type, entity type, and semantic node.
* If the document already contains an equal redaction entity, then the original Entity is returned.
* Also inserts the Entity into the kieSession.
*
* @param textRange The boundary of the redaction entity.
* @param type The type of the redaction entity.
* @param entityType The entity type of the redaction entity.
* @param node The semantic node to associate with the redaction entity.
* @return An Optional containing the redaction entity, or the previous entity if the entity already exists.
*/
public Optional<TextEntity> byTextRange(TextRange textRange, String type, EntityType entityType, SemanticNode node) {
return byTextRangeWithEngine(textRange, type, entityType, node, Set.of(Engine.RULE));
}
/**
* Creates a redaction entity based on the given boundary, type, entity type, and semantic node.
* If the document already contains an equal redaction entity, then the original Entity is returned.
* Also inserts the Entity into the kieSession.
*
* @param textRange The boundary of the redaction entity.
* @param type The type of the redaction entity.
* @param entityType The entity type of the redaction entity.
* @param node The semantic node to associate with the redaction entity.
* @return An Optional containing the redaction entity, or the previous entity if the entity already exists.
*/
public Optional<TextEntity> byTextRangeWithEngine(TextRange textRange, String type, EntityType entityType, SemanticNode node, Set<Engine> engines) {
if (!node.getTextRange().contains(textRange)) {
throw new IllegalArgumentException(String.format("%s is not in the %s of the provided semantic node %s", textRange, node.getTextRange(), node));
}
TextRange trimmedTextRange = textRange.trim(node.getTextBlock());
TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType);
if (node.getEntities().contains(entity)) {
return node.getEntities().stream().filter(entity::equals).peek(e -> e.addEngines(engines)).findAny();
}
addEntityToGraph(entity, node);
entity.addEngines(engines);
insertToKieSession(entity);
return Optional.of(entity);
}
public Stream<TextEntity> lineAfterStrings(List<String> strings, String type, EntityType entityType, SemanticNode node) {
TextBlock textBlock = node.getTextBlock();
SearchImplementation searchImplementation = new SearchImplementation(strings, false);
return searchImplementation.getBoundaries(textBlock, node.getTextRange())
.stream().map(boundary -> toLineAfterTextRange(textBlock, boundary)).filter(boundary -> isValidEntityTextRange(textBlock, boundary))
.stream()
.map(boundary -> toLineAfterTextRange(textBlock, boundary))
.filter(boundary -> isValidEntityTextRange(textBlock, boundary))
.map(boundary -> byTextRange(boundary, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
@ -275,29 +322,9 @@ public class EntityCreationService {
TextBlock textBlock = node.getTextBlock();
SearchImplementation searchImplementation = new SearchImplementation(strings, true);
return searchImplementation.getBoundaries(textBlock, node.getTextRange())
.stream().map(boundary -> toLineAfterTextRange(textBlock, boundary)).filter(boundary -> isValidEntityTextRange(textBlock, boundary))
.map(boundary -> byTextRange(boundary, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
}
public Stream<TextEntity> lineAfterString(String string, String type, EntityType entityType, SemanticNode node) {
TextBlock textBlock = node.getTextBlock();
return RedactionSearchUtility.findTextRangesByString(string, textBlock)
.stream().map(boundary -> toLineAfterTextRange(textBlock, boundary)).filter(boundary -> isValidEntityTextRange(textBlock, boundary))
.map(boundary -> byTextRange(boundary, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
}
public Stream<TextEntity> lineAfterStringIgnoreCase(String string, String type, EntityType entityType, SemanticNode node) {
TextBlock textBlock = node.getTextBlock();
return RedactionSearchUtility.findTextRangesByStringIgnoreCase(string, textBlock)
.stream().map(boundary -> toLineAfterTextRange(textBlock, boundary)).filter(boundary -> isValidEntityTextRange(textBlock, boundary))
.stream()
.map(boundary -> toLineAfterTextRange(textBlock, boundary))
.filter(boundary -> isValidEntityTextRange(textBlock, boundary))
.map(boundary -> byTextRange(boundary, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
@ -520,31 +547,29 @@ public class EntityCreationService {
}
/**
* Creates a redaction entity based on the given boundary, type, entity type, and semantic node.
* If the document already contains an equal redaction entity, then the original Entity is returned.
* Also inserts the Entity into the kieSession.
*
* @param textRange The boundary of the redaction entity.
* @param type The type of the redaction entity.
* @param entityType The entity type of the redaction entity.
* @param node The semantic node to associate with the redaction entity.
* @return An Optional containing the redaction entity, or the previous entity if the entity already exists.
*/
public Optional<TextEntity> byTextRange(TextRange textRange, String type, EntityType entityType, SemanticNode node) {
public Stream<TextEntity> lineAfterString(String string, String type, EntityType entityType, SemanticNode node) {
if (!node.getTextRange().contains(textRange)) {
throw new IllegalArgumentException(String.format("%s is not in the %s of the provided semantic node %s", textRange, node.getTextRange(), node));
}
TextRange trimmedTextRange = textRange.trim(node.getTextBlock());
TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType);
if (node.getEntities().contains(entity)) {
return node.getEntities().stream().filter(entity::equals).peek(e -> e.addEngine(Engine.RULE)).findAny();
}
addEntityToGraph(entity, node);
entity.addEngine(Engine.RULE);
insertToKieSession(entity);
return Optional.of(entity);
TextBlock textBlock = node.getTextBlock();
return RedactionSearchUtility.findTextRangesByString(string, textBlock)
.stream()
.map(boundary -> toLineAfterTextRange(textBlock, boundary))
.filter(boundary -> isValidEntityTextRange(textBlock, boundary))
.map(boundary -> byTextRange(boundary, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
}
public Stream<TextEntity> lineAfterStringIgnoreCase(String string, String type, EntityType entityType, SemanticNode node) {
TextBlock textBlock = node.getTextBlock();
return RedactionSearchUtility.findTextRangesByStringIgnoreCase(string, textBlock)
.stream()
.map(boundary -> toLineAfterTextRange(textBlock, boundary))
.filter(boundary -> isValidEntityTextRange(textBlock, boundary))
.map(boundary -> byTextRange(boundary, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
}
@ -600,15 +625,14 @@ public class EntityCreationService {
return newEntity;
}
public TextEntity copyEntityWithoutRules(TextEntity entity, String type, EntityType entityType, SemanticNode node) {
TextEntity newEntity = TextEntity.initialEntityNode(entity.getTextRange(), type, entityType);
newEntity.addEngines(entity.getEngines());
TextEntity newEntity = byTextRangeWithEngine(entity.getTextRange(), type, entityType, node, entity.getEngines()).orElseThrow(() -> new NotFoundException(
"No entity present!"));
newEntity.getManualOverwrite().addChanges(entity.getManualOverwrite().getManualChangeLog());
newEntity.setDictionaryEntry(entity.isDictionaryEntry());
newEntity.setDossierDictionaryEntry(entity.isDossierDictionaryEntry());
addEntityToGraph(newEntity, node);
insertToKieSession(newEntity);
return newEntity;
}
@ -623,27 +647,23 @@ public class EntityCreationService {
public TextEntity byNerEntity(NerEntities.NerEntity nerEntity, EntityType entityType, SemanticNode semanticNode) {
var entity = forceByTextRange(nerEntity.textRange(), nerEntity.type(), entityType, semanticNode);
entity.addEngine(Engine.NER);
insertToKieSession(entity);
return entity;
return byTextRangeWithEngine(nerEntity.textRange(), nerEntity.type(), entityType, semanticNode, Set.of(Engine.NER)).orElseThrow(() -> new NotFoundException(
"No entity present!"));
}
public TextEntity byNerEntity(NerEntities.NerEntity nerEntity, String type, EntityType entityType, SemanticNode semanticNode) {
var entity = forceByTextRange(nerEntity.textRange(), type, entityType, semanticNode);
entity.addEngine(Engine.NER);
insertToKieSession(entity);
return entity;
return byTextRangeWithEngine(nerEntity.textRange(), type, entityType, semanticNode, Set.of(Engine.NER)).orElseThrow(() -> new NotFoundException("No entity present!"));
}
public Stream<TextEntity> combineNerEntitiesToCbiAddressDefaults(NerEntities nerEntities, String type, EntityType entityType, SemanticNode semanticNode) {
return NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities).map(boundary -> forceByTextRange(boundary, type, entityType, semanticNode))
.peek(entity -> entity.addEngine(Engine.NER))
.peek(this::insertToKieSession);
return NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
.map(boundary -> byTextRangeWithEngine(boundary, type, entityType, semanticNode, Set.of(Engine.NER)))
.filter(Optional::isPresent)
.map(Optional::get);
}

View File

@ -86,6 +86,7 @@ public class RedactionStorageService {
}
@Deprecated(forRemoval = true)
@Timed("redactmanager_getRedactionLog")
public RedactionLog getRedactionLog(String dossierId, String fileId) {
@ -104,7 +105,7 @@ public class RedactionStorageService {
try {
return storageService.readJSONObject(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.ENTITY_LOG), EntityLog.class);
} catch (StorageObjectDoesNotExist e) {
log.debug("RedactionLog not available.");
log.debug("EntityLog not available.");
return null;
}
@ -160,6 +161,12 @@ public class RedactionStorageService {
}
public boolean entityLogExists(String dossierId, String fileId) {
return storageService.objectExists(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.COMPONENT_LOG));
}
@RequiredArgsConstructor
public enum StorageType {
PARSED_DOCUMENT(".json");

View File

@ -12,7 +12,7 @@ import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collector;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import lombok.AllArgsConstructor;
@ -26,18 +26,9 @@ public class RectangleTransformations {
}
public static Rectangle2D rectangleBBox(List<Rectangle> rectangles) {
public static Rectangle2D rectangleBBox(List<Position> positions) {
return rectangles.stream().map(RectangleTransformations::toRectangle2D).collect(new Rectangle2DBBoxCollector());
}
public static Rectangle2D toRectangle2D(Rectangle redactionLogRectangle) {
return new Rectangle2D.Double(redactionLogRectangle.getTopLeft().getX(),
redactionLogRectangle.getTopLeft().getY() + redactionLogRectangle.getHeight(),
redactionLogRectangle.getWidth(),
-redactionLogRectangle.getHeight());
return positions.stream().map(Position::toRectangle2D).collect(new Rectangle2DBBoxCollector());
}

View File

@ -44,21 +44,29 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
private static final String COMPONENT_RULES = loadFromClassPath("drools/documine_flora_components.drl");
@BeforeEach
public void setUpDictionaries() {
loadDictionaryForTest();
mockDictionaryCalls(0L);
}
@Test
// @Disabled
public void titleExtraction() throws IOException {
AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/A8591B/15-Curacron_ToxicidadeAgudaOral.pdf");
AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/402Study-ocred.pdf");
// AnalyzeRequest request = prepareStorage("files/Documine/Flora/ProblemDocs/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (1).pdf",
// "files/Documine/Flora/ProblemDocs/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (1).TABLES.json");
System.out.println("Start Full integration test");
analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request);
System.out.println("Finished structure analysis");
AnalyzeResult result = analyzeService.analyze(request);
System.out.println("Finished analysis");
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID);
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
@ -84,7 +92,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
System.out.println("Finished structure analysis");
AnalyzeResult result = analyzeService.analyze(request);
System.out.println("Finished analysis");
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
@ -104,14 +112,14 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
// Fix In BodyTextFrameService destroys header detection in files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf
// TODO unify logic
AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/402Study.pdf");
AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/402Study-ocred.pdf");
System.out.println("Start Full integration test");
analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request);
System.out.println("Finished structure analysis");
AnalyzeResult result = analyzeService.analyze(request);
System.out.println("Finished analysis");
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());

View File

@ -19,6 +19,7 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
@ -44,6 +45,9 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequ
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Comment;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
@ -59,7 +63,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemp
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
@ -161,11 +164,11 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
AnalyzeResult result = analyzeService.analyze(request);
Map<String, List<RedactionLogEntry>> duplicates = new HashMap<>();
Map<String, List<EntityLogEntry>> duplicates = new HashMap<>();
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
redactionLog.getRedactionLogEntry().forEach(entry -> {
entityLog.getEntityLogEntry().forEach(entry -> {
duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry);
});
@ -202,9 +205,9 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
AnalyzeResult result = analyzeService.analyze(request);
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var redactionLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var values = redactionLog.getRedactionLogEntry().stream().map(RedactionLogEntry::getValue).collect(Collectors.toList());
var values = redactionLog.getEntityLogEntry().stream().map(EntityLogEntry::getValue).collect(Collectors.toList());
assertThat(values).containsExactlyInAnyOrder("Lastname M.", "Doe", "Doe J.", "M. Mustermann", "Mustermann M.", "F. Lastname");
}
@ -213,13 +216,13 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
@Test
public void titleExtraction() throws IOException {
AnalyzeRequest request = uploadFileToStorage("files/new/crafted document.pdf");
AnalyzeRequest request = uploadFileToStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
System.out.println("Start Full integration test");
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
System.out.println("Finished structure analysis");
AnalyzeResult result = analyzeService.analyze(request);
System.out.println("Finished analysis");
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
@ -257,7 +260,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
analyzeService.analyze(request);
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var toRemove = IdRemoval.builder()
.annotationId("c630599611e6e3db314518374bcf70f7")
@ -272,13 +275,13 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
request.setManualRedactions(manualRedactions);
analyzeService.reanalyze(request);
var mergedRedactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var mergedEntityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var cbiAddressBeforeHintRemoval = redactionLog.getRedactionLogEntry().stream().filter(re -> re.getType().equalsIgnoreCase("CBI_Address")).findAny().get();
assertThat(cbiAddressBeforeHintRemoval.isRedacted()).isFalse();
var cbiAddressBeforeHintRemoval = entityLog.getEntityLogEntry().stream().filter(re -> re.getType().equalsIgnoreCase("CBI_Address")).findAny().get();
assertThat(cbiAddressBeforeHintRemoval.getState().equals(EntryState.APPLIED)).isFalse();
var cbiAddressAfterHintRemoval = mergedRedactionLog.getRedactionLogEntry().stream().filter(re -> re.getType().equalsIgnoreCase("CBI_Address")).findAny().get();
assertThat(cbiAddressAfterHintRemoval.isRedacted()).isTrue();
var cbiAddressAfterHintRemoval = mergedEntityLog.getEntityLogEntry().stream().filter(re -> re.getType().equalsIgnoreCase("CBI_Address")).findAny().get();
assertThat(cbiAddressAfterHintRemoval.getState().equals(EntryState.APPLIED)).isTrue();
}
@ -303,11 +306,11 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
AnalyzeResult result = analyzeService.analyze(request);
System.out.println("analysis analysis duration: " + (System.currentTimeMillis() - fstart));
Map<String, List<RedactionLogEntry>> duplicates = new HashMap<>();
Map<String, List<EntityLogEntry>> duplicates = new HashMap<>();
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
redactionLog.getRedactionLogEntry().forEach(entry -> {
entityLog.getEntityLogEntry().forEach(entry -> {
duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry);
});
@ -352,7 +355,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
AnalyzeResult result = analyzeService.analyze(request);
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var documentGraph = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(TEST_DOSSIER_ID, TEST_FILE_ID));
long end = System.currentTimeMillis();
@ -365,15 +368,15 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
int correctFound = 0;
loop:
for (RedactionLogEntry redactionLogEntry : redactionLog.getRedactionLogEntry()) {
for (EntityLogEntry entityLogEntry : entityLog.getEntityLogEntry()) {
for (Section section : documentGraph.getMainSections()) {
if (redactionLogEntry.isImage()) {
if (entityLogEntry.getEntryType().equals(EntryType.IMAGE)) {
correctFound++;
continue loop;
}
if (redactionLogEntry.getSectionNumber() == section.getTreeId().get(0)) {
String value = section.getTextBlock().subSequence(new TextRange(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset())).toString();
if (redactionLogEntry.getValue().equalsIgnoreCase(value)) {
if (Objects.equals(entityLogEntry.getContainingNodeId().get(0), section.getTreeId().get(0))) {
String value = section.getTextBlock().subSequence(new TextRange(entityLogEntry.getStartOffset(), entityLogEntry.getEndOffset())).toString();
if (entityLogEntry.getValue().equalsIgnoreCase(value)) {
correctFound++;
} else {
throw new RuntimeException("WTF");
@ -414,7 +417,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
AnalyzeResult reanalyzeResult = analyzeService.reanalyze(request);
redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
end = System.currentTimeMillis();
System.out.println("reanalysis analysis duration: " + (end - start));
@ -434,7 +437,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
analyzeService.reanalyze(request);
redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
System.out.println("Output file:" + outputFileName);
}
@ -468,9 +471,9 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
mockDictionaryCalls(3L);
analyzeService.reanalyze(request);
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var changes = redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getValue() != null && entry.getValue().equals("report")).findFirst().get().getChanges();
var changes = entityLog.getEntityLogEntry().stream().filter(entry -> entry.getValue() != null && entry.getValue().equals("report")).findFirst().get().getChanges();
assertThat(changes.size()).isEqualTo(2);
@ -507,7 +510,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
AnalyzeResult result = analyzeService.analyze(request);
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var documentGraph = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(TEST_DOSSIER_ID, TEST_FILE_ID));
long end = System.currentTimeMillis();
@ -518,13 +521,12 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
fileOutputStream.write(objectMapper.writeValueAsBytes(redactionStorageService.getDocumentData(TEST_DOSSIER_ID, TEST_FILE_ID)));
}
List<String> valuesInDocument = redactionLog.getRedactionLogEntry()
.stream()
.filter(e -> !e.isImage())
List<String> valuesInDocument = entityLog.getEntityLogEntry()
.stream().filter(e -> !e.getEntryType().equals(EntryType.IMAGE))
.map(redactionLogEntry -> new TextRange(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset()))
.map(boundary -> documentGraph.getTextBlock().subSequence(boundary).toString())
.toList();
List<String> valuesInRedactionLog = redactionLog.getRedactionLogEntry().stream().filter(e -> !e.isImage()).map(RedactionLogEntry::getValue).toList();
List<String> valuesInRedactionLog = entityLog.getEntityLogEntry().stream().filter(e -> !e.getEntryType().equals(EntryType.IMAGE)).map(EntityLogEntry::getValue).toList();
assertEquals(valuesInRedactionLog, valuesInDocument);
@ -562,7 +564,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
AnalyzeResult reanalyzeResult = analyzeService.reanalyze(request);
redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
end = System.currentTimeMillis();
System.out.println("reanalysis analysis duration: " + (end - start));
@ -583,7 +585,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
analyzeService.reanalyze(request);
redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
System.out.println("Output file:" + outputFileName);
}
@ -810,8 +812,8 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
fileOutputStream.write(annotateResponse.getDocument());
}
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
assertThat(redactionLog.getRedactionLogEntry().size()).isEqualTo(5);
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
assertThat(entityLog.getEntityLogEntry().size()).isEqualTo(5);
long end = System.currentTimeMillis();
@ -857,7 +859,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
AnalyzeResult result = analyzeService.analyze(request);
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
.manualRedactions(manualRedactions)
@ -885,10 +887,10 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
AnalyzeResult result = analyzeService.analyze(request);
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var redactionLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
redactionLog.getRedactionLogEntry().forEach(entry -> {
if (!entry.isHint()) {
redactionLog.getEntityLogEntry().forEach(entry -> {
if (!entry.getEntryType().equals(EntryType.HINT)) {
assertThat(entry.getReason()).isEqualTo("Not redacted because it's row does not belong to a vertebrate study");
}
});
@ -1048,7 +1050,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
AnalyzeResult result = analyzeService.analyze(request);
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
@ -1056,7 +1058,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
fileOutputStream.write(annotateResponse.getDocument());
}
redactionLog.getRedactionLogEntry().forEach(entry -> {
entityLog.getEntityLogEntry().forEach(entry -> {
if (entry.getValue() == null) {
return;
}
@ -1094,8 +1096,8 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
fileOutputStream.write(annotateResponse.getDocument());
}
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var values = redactionLog.getRedactionLogEntry().stream().map(RedactionLogEntry::getValue).collect(Collectors.toList());
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var values = entityLog.getEntityLogEntry().stream().map(EntityLogEntry::getValue).collect(Collectors.toList());
assertThat(values).contains("Mrs. Robinson");
assertThat(values).contains("Mr. Bojangles");

View File

@ -22,9 +22,11 @@ import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState;
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
@ -120,22 +122,20 @@ public class RedactionIntegrationV2Test extends AbstractRedactionIntegrationTest
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
analyzeService.analyze(request);
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
assertThat(redactionLog.getRedactionLogEntry().size()).isEqualTo(1);
assertThat(entityLog.getEntityLogEntry().size()).isEqualTo(1);
RedactionLogEntry redactionLogEntry = redactionLog.getRedactionLogEntry().get(0);
EntityLogEntry redactionLogEntry = entityLog.getEntityLogEntry().get(0);
assertThat(redactionLogEntry.getType()).isEqualTo(DICTIONARY_AUTHOR);
assertThat(redactionLogEntry.getValue()).isEqualTo(entryAuthorAndPIIDictionary);
assertThat(redactionLogEntry.isRedacted()).isEqualTo(true);
assertThat(redactionLogEntry.isRecommendation()).isEqualTo(false);
assertThat(redactionLogEntry.isFalsePositive()).isEqualTo(false);
assertThat(redactionLogEntry.getState()).isEqualTo(EntryState.APPLIED);
assertThat(redactionLogEntry.isExcluded()).isEqualTo(false);
assertThat(redactionLogEntry.isDictionaryEntry()).isEqualTo(true);
assertThat(redactionLogEntry.getEngines().size()).isEqualTo(1);
assertThat(redactionLogEntry.getEngines().contains(com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine.DICTIONARY)).isEqualTo(true);
assertThat(redactionLogEntry.getEngines().contains(Engine.DICTIONARY)).isEqualTo(true);
}
@ -158,22 +158,20 @@ public class RedactionIntegrationV2Test extends AbstractRedactionIntegrationTest
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
analyzeService.analyze(request);
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var redactionLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
assertThat(redactionLog.getRedactionLogEntry().size()).isEqualTo(1);
assertThat(redactionLog.getEntityLogEntry().size()).isEqualTo(1);
RedactionLogEntry redactionLogEntry = redactionLog.getRedactionLogEntry().get(0);
EntityLogEntry redactionLogEntry = redactionLog.getEntityLogEntry().get(0);
assertThat(redactionLogEntry.getType()).isEqualTo(DICTIONARY_AUTHOR);
assertThat(redactionLogEntry.getValue()).isEqualTo(entryAuthorDictionary);
assertThat(redactionLogEntry.isRedacted()).isEqualTo(true);
assertThat(redactionLogEntry.isRecommendation()).isEqualTo(false);
assertThat(redactionLogEntry.isFalsePositive()).isEqualTo(false);
assertThat(redactionLogEntry.getState()).isEqualTo(EntryState.APPLIED);
assertThat(redactionLogEntry.isExcluded()).isEqualTo(false);
assertThat(redactionLogEntry.isDictionaryEntry()).isEqualTo(true);
assertThat(redactionLogEntry.getEngines().size()).isEqualTo(1);
assertThat(redactionLogEntry.getEngines().contains(com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine.DICTIONARY)).isEqualTo(true);
assertThat(redactionLogEntry.getEngines().contains(Engine.DICTIONARY)).isEqualTo(true);
}

View File

@ -1,11 +1,9 @@
package com.iqser.red.service.redaction.v1.server.annotate;
import java.awt.Color;
import java.awt.geom.Rectangle2D;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -15,26 +13,20 @@ import java.util.stream.Stream;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationHighlight;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationText;
import org.springframework.stereotype.Service;
import com.google.common.primitives.Floats;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogComment;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.section.CellRectangle;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.section.SectionGrid;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.section.SectionRectangle;
import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
@ -67,17 +59,15 @@ public class AnnotationService {
public AnnotateResponse annotate(AnnotateRequest annotateRequest) {
var storedObjectFile = redactionStorageService.getStoredObjectFile(RedactionStorageService.StorageIdUtils.getStorageId(annotateRequest.getDossierId(),
annotateRequest.getFileId(),
FileType.ORIGIN));
annotateRequest.getFileId(), FileType.VIEWER_DOCUMENT));
var redactionLog = redactionStorageService.getRedactionLog(annotateRequest.getDossierId(), annotateRequest.getFileId());
var sectionsGrid = redactionStorageService.getSectionGrid(annotateRequest.getDossierId(), annotateRequest.getFileId());
var entityLog = redactionStorageService.getEntityLog(annotateRequest.getDossierId(), annotateRequest.getFileId());
try (PDDocument pdDocument = Loader.loadPDF(storedObjectFile)) {
pdDocument.setAllSecurityToBeRemoved(true);
dictionaryService.updateDictionary(annotateRequest.getDossierTemplateId(), annotateRequest.getDossierId());
annotate(pdDocument, redactionLog, sectionsGrid);
annotate(pdDocument, entityLog);
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
pdDocument.save(byteArrayOutputStream);
@ -90,20 +80,15 @@ public class AnnotationService {
}
private void annotate(PDDocument document, RedactionLog redactionLog, SectionGrid sectionGrid) throws IOException {
private void annotate(PDDocument document, EntityLog entityLog) throws IOException {
Map<Integer, List<RedactionLogEntry>> redactionLogPerPage = convertRedactionLog(redactionLog);
Map<Integer, List<EntityLogEntry>> entityLogPerPage = groupingByPageNumber(entityLog);
for (int page = 1; page <= document.getNumberOfPages(); page++) {
PDPage pdPage = document.getPage(page - 1);
List<SectionRectangle> sectionRectangles = sectionGrid.getRectanglesPerPage().get(page);
if (sectionRectangles != null && !sectionRectangles.isEmpty()) {
drawSectionGrid(document, pdPage, sectionRectangles);
}
List<RedactionLogEntry> logEntries = redactionLogPerPage.get(page);
List<EntityLogEntry> logEntries = entityLogPerPage.get(page);
if (logEntries != null && !logEntries.isEmpty()) {
addAnnotations(logEntries, pdPage, page);
}
@ -111,19 +96,67 @@ public class AnnotationService {
}
private void addAnnotations(List<RedactionLogEntry> logEntries, PDPage pdPage, int page) throws IOException {
private Map<Integer, List<EntityLogEntry>> groupingByPageNumber(EntityLog entityLog) {
Map<Integer, List<EntityLogEntry>> entityLogPerPage = new HashMap<>();
if (entityLog == null) {
return entityLogPerPage;
}
for (EntityLogEntry entry : entityLog.getEntityLogEntry()) {
int page = 0;
for (Position position : entry.getPositions()) {
if (position.getPageNumber() != page) {
entityLogPerPage.computeIfAbsent(position.getPageNumber(), x -> new ArrayList<>()).add(entry);
page = position.getPageNumber();
}
}
}
return entityLogPerPage;
}
private void addAnnotations(List<EntityLogEntry> logEntries, PDPage pdPage, int page) throws IOException {
List<PDAnnotation> annotations = pdPage.getAnnotations();
for (RedactionLogEntry entry : logEntries) {
if (entry.lastChangeIsRemoved()) {
for (EntityLogEntry entry : logEntries) {
if (entry.getState().equals(EntryState.REMOVED)) {
continue;
}
annotations.addAll(createAnnotation(entry, page, pdPage.getRotation(), pdPage.getCropBox()));
annotations.addAll(createAnnotation(entry, page));
}
}
public static PDRectangle toPDRectangleBBox(List<Rectangle> rectangles) {
private List<PDAnnotation> createAnnotation(EntityLogEntry entityLogEntry, int page) {
List<PDAnnotation> annotations = new ArrayList<>();
List<Position> rectangles = entityLogEntry.getPositions().stream().filter(pos -> pos.getPageNumber() == page).collect(Collectors.toList());
if (rectangles.isEmpty()) {
return annotations;
}
PDAnnotationHighlight annotation = new PDAnnotationHighlight();
annotation.constructAppearances();
PDRectangle pdRectangle = toPDRectangleBBox(rectangles);
annotation.setRectangle(pdRectangle);
annotation.setQuadPoints(Floats.toArray(toQuadPoints(rectangles)));
if (!(entityLogEntry.getEntryType() == null || entityLogEntry.getEntryType().equals(EntryType.HINT) || entityLogEntry.getState().equals(EntryState.IGNORED))) {
annotation.setContents(entityLogEntry.getValue() + " " + createAnnotationContent(entityLogEntry));
}
annotation.setTitlePopup(entityLogEntry.getId());
annotation.setAnnotationName(entityLogEntry.getId());
annotation.setColor(new PDColor(entityLogEntry.getColor(), PDDeviceRGB.INSTANCE));
annotation.setNoRotate(false);
annotations.add(annotation);
return annotations;
}
public static PDRectangle toPDRectangleBBox(List<Position> rectangles) {
Rectangle2D rectangle2D = RectangleTransformations.rectangleBBox(rectangles);
@ -135,69 +168,12 @@ public class AnnotationService {
return annotationPosition;
}
private List<PDAnnotation> createAnnotation(RedactionLogEntry redactionLogEntry, int page, int rotation, PDRectangle cropBox) {
List<PDAnnotation> annotations = new ArrayList<>();
public static List<Double> toQuadPoints(List<Position> rectangles) {
List<Rectangle> rectangles = redactionLogEntry.getPositions().stream().filter(pos -> pos.getPage() == page).collect(Collectors.toList());
if (rectangles.isEmpty()) {
return annotations;
}
PDAnnotationHighlight annotation = new PDAnnotationHighlight();
annotation.constructAppearances();
PDRectangle pdRectangle = toPDRectangleBBox(rectangles);
annotation.setRectangle(pdRectangle);
annotation.setQuadPoints(Floats.toArray(toQuadPoints(rectangles)));
if (!redactionLogEntry.isHint()) {
annotation.setContents(redactionLogEntry.getValue() + " " + createAnnotationContent(redactionLogEntry));
}
annotation.setTitlePopup(redactionLogEntry.getId());
annotation.setAnnotationName(redactionLogEntry.getId());
annotation.setColor(new PDColor(redactionLogEntry.getColor(), PDDeviceRGB.INSTANCE));
annotation.setNoRotate(false);
annotations.add(annotation);
if (redactionLogEntry.getComments() != null) {
for (RedactionLogComment comment : redactionLogEntry.getComments()) {
PDAnnotationText txtAnnot = new PDAnnotationText();
txtAnnot.setAnnotationName(String.valueOf(comment.getId()));
txtAnnot.setInReplyTo(annotation); // Reference to highlight annotation
txtAnnot.setName(PDAnnotationText.NAME_COMMENT);
txtAnnot.setCreationDate(GregorianCalendar.from(comment.getDate().toZonedDateTime()));
txtAnnot.setTitlePopup(comment.getUser());
txtAnnot.setContents(comment.getText());
txtAnnot.setRectangle(pdRectangle);
annotations.add(txtAnnot);
}
}
return annotations;
return rectangles.stream().map(Position::toRectangle2D).flatMap(AnnotationService::toQuadPoints).toList();
}
private String createAnnotationContent(RedactionLogEntry redactionLogEntry) {
return redactionLogEntry.getType() + " \nRule " + redactionLogEntry.getMatchedRule() + " matched\n\n" + redactionLogEntry.getReason() + "\n\nLegal basis:" + redactionLogEntry.getLegalBasis() + "\n\nIn section: \"" + redactionLogEntry.getSection() + "\"";
}
public static List<Double> toQuadPoints(List<Rectangle> rectangles) {
return rectangles.stream().map(AnnotationService::toRectangle2D).flatMap(AnnotationService::toQuadPoints).toList();
}
private static Rectangle2D toRectangle2D(Rectangle redactionLogRectangle) {
return new Rectangle2D.Double(redactionLogRectangle.getTopLeft().getX(),
redactionLogRectangle.getTopLeft().getY(),
redactionLogRectangle.getWidth(),
redactionLogRectangle.getHeight());
}
public static Stream<Double> toQuadPoints(Rectangle2D rectangle) {
double x1 = rectangle.getMinX();
@ -215,71 +191,9 @@ public class AnnotationService {
}
private void drawSectionGrid(PDDocument document, PDPage pdPage, List<SectionRectangle> sectionRectangles) throws IOException {
private String createAnnotationContent(EntityLogEntry redactionLogEntry) {
PDPageContentStream contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.APPEND, true);
for (SectionRectangle sectionRectangle : sectionRectangles) {
drawSectionRectangle(contentStream, sectionRectangle);
drawSectionPartNumberText(contentStream, sectionRectangle);
drawTableCells(contentStream, sectionRectangle);
}
contentStream.close();
}
private void drawSectionRectangle(PDPageContentStream contentStream, SectionRectangle sectionRectangle) throws IOException {
contentStream.setStrokingColor(Color.LIGHT_GRAY);
contentStream.setLineWidth(0.5f);
contentStream.addRect(sectionRectangle.getTopLeft().getX(), sectionRectangle.getTopLeft().getY(), sectionRectangle.getWidth(), sectionRectangle.getHeight());
contentStream.stroke();
}
private void drawSectionPartNumberText(PDPageContentStream contentStream, SectionRectangle sectionRectangle) throws IOException {
contentStream.beginText();
contentStream.setNonStrokingColor(Color.DARK_GRAY);
contentStream.setFont(new PDType1Font(Standard14Fonts.FontName.TIMES_ROMAN), 8f);
if (sectionRectangle.getTableCells() == null) {
contentStream.newLineAtOffset(sectionRectangle.getTopLeft().getX(), sectionRectangle.getTopLeft().getY() + sectionRectangle.getHeight());
} else {
contentStream.newLineAtOffset(sectionRectangle.getTopLeft().getX(), sectionRectangle.getTopLeft().getY());
}
contentStream.showText(sectionRectangle.getPart() + "/" + sectionRectangle.getNumberOfParts());
contentStream.endText();
}
private void drawTableCells(PDPageContentStream contentStream, SectionRectangle sectionRectangle) throws IOException {
if (sectionRectangle.getTableCells() != null) {
for (CellRectangle cell : sectionRectangle.getTableCells()) {
contentStream.setLineWidth(0.5f);
contentStream.setStrokingColor(Color.CYAN);
contentStream.addRect(cell.getTopLeft().getX(), cell.getTopLeft().getY(), cell.getWidth(), cell.getHeight());
contentStream.stroke();
}
}
}
private Map<Integer, List<RedactionLogEntry>> convertRedactionLog(RedactionLog redactionLog) {
Map<Integer, List<RedactionLogEntry>> redactionLogPerPage = new HashMap<>();
if (redactionLog == null) {
return redactionLogPerPage;
}
for (RedactionLogEntry entry : redactionLog.getRedactionLogEntry()) {
int page = 0;
for (Rectangle position : entry.getPositions()) {
if (position.getPage() != page) {
redactionLogPerPage.computeIfAbsent(position.getPage(), x -> new ArrayList<>()).add(entry);
page = position.getPage();
}
}
}
return redactionLogPerPage;
return redactionLogEntry.getType() + " \nRule " + redactionLogEntry.getMatchedRule() + " matched\n\n" + redactionLogEntry.getReason() + "\n\nLegal basis:" + redactionLogEntry.getLegalBasis() + "\n\nIn section: \"" + redactionLogEntry.getSection() + "\"";
}
}

View File

@ -29,4 +29,4 @@ J.B. RASCLE
青森植
サンプル量
供試試料 (無処理 区)
材料
材料