Merge branch 'RED-8481-rules' into 'master'

RED-8481: use visual layout parser for signature extraction

Closes RED-8481

See merge request redactmanager/redaction-service!290
This commit is contained in:
Yannik Hampe 2024-02-28 09:04:55 +01:00
commit 23b8d164d2
12 changed files with 255 additions and 8 deletions

View File

@ -35,6 +35,7 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(level = AccessLevel.PRIVATE)
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
public class Image implements GenericSemanticNode, IEntity {
@Builder.Default
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
@ -73,7 +74,9 @@ public class Image implements GenericSemanticNode, IEntity {
@Override
public TextBlock getTextBlock() {
return streamAllSubNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector());
return streamAllSubNodes().filter(SemanticNode::isLeaf)
.map(SemanticNode::getLeafTextBlock)
.collect(new TextBlockCollector());
}
@ -94,7 +97,8 @@ public class Image implements GenericSemanticNode, IEntity {
@Override
public String type() {
return getManualOverwrite().getType().orElse(imageType.toString());
return getManualOverwrite().getType()
.orElse(imageType.toString());
}
@ -126,15 +130,29 @@ public class Image implements GenericSemanticNode, IEntity {
return name.charAt(0) + name.substring(1).toLowerCase(Locale.ENGLISH);
}
public boolean mostlyContainedBy(Image image) {
Map<Page,Rectangle2D> bboxImage = image.getBBox();
Map<Page,Rectangle2D> bbox = this.getBBox();
public boolean mostlyContainedBy(Image image, double containmentThreshold) {
Map<Page, Rectangle2D> bboxImage = image.getBBox();
Map<Page, Rectangle2D> bbox = this.getBBox();
Rectangle2D intersection = bboxImage.get(this.page).createIntersection(bbox.get(this.page));
double calculatedIntersection = intersection.getWidth() * intersection.getHeight();
double area = bbox.get(this.page).getWidth() * bbox.get(this.page).getHeight();
return (calculatedIntersection / area) > 0.8;
return (calculatedIntersection / area) > containmentThreshold;
}
public boolean mostlyContains(Image image, double containmentThreshold) {
Map<Page, Rectangle2D> bboxImage = image.getBBox();
Map<Page, Rectangle2D> bbox = this.getBBox();
Rectangle2D intersection = bboxImage.get(this.page).createIntersection(bbox.get(this.page));
double calculatedIntersection = intersection.getWidth() * intersection.getHeight();
double area = bbox.get(this.page).getWidth() * bbox.get(this.page).getHeight();
return (area / calculatedIntersection) > containmentThreshold;
}
public int length() {
return 0;

View File

@ -54,6 +54,8 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
global Document document
global EntityCreationService entityCreationService
global ManualChangesApplicationService manualChangesApplicationService
@ -1013,6 +1015,27 @@ rule "X.8.1: Remove Entity when intersected by imported Entity"
end
// Rule unit: X.9
rule "X.9.0: Merge mostly contained signatures"
when
$aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI)
$signature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.ALGORITHM, mostlyContains($aiSignature, 0.8))
then
$aiSignature.remove("X.9.0", "removed because already contained by alogrithm signature");
$signature.addEngine(LayoutEngine.AI);
end
// Rule unit: X.10
rule "X.10.0: remove false positives of ai"
when
$anyImage: Image(engines contains LayoutEngine.ALGORITHM)
$aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI, !mostlyContainedBy($anyImage, 0.8))
then
$aiSignature.remove("X.10.0", "Removed because false positive");
end
//------------------------------------ File attributes rules ------------------------------------
// Rule unit: FA.1

View File

@ -54,6 +54,8 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
global Document document
global EntityCreationService entityCreationService
global ManualChangesApplicationService manualChangesApplicationService
@ -1576,6 +1578,27 @@ rule "X.8.1: Remove Entity when intersected by imported Entity"
end
// Rule unit: X.9
rule "X.9.0: Merge mostly contained signatures"
when
$aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI)
$signature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.ALGORITHM, mostlyContains($aiSignature, 0.8))
then
$aiSignature.remove("X.9.0", "removed because already contained by alogrithm signature");
$signature.addEngine(LayoutEngine.AI);
end
// Rule unit: X.10
rule "X.10.0: remove false positives of ai"
when
$anyImage: Image(engines contains LayoutEngine.ALGORITHM)
$aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI, !mostlyContainedBy($anyImage, 0.8))
then
$aiSignature.remove("X.10.0", "Removed because false positive");
end
//------------------------------------ File attributes rules ------------------------------------
// Rule unit: FA.1

View File

@ -54,6 +54,8 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
global Document document
global EntityCreationService entityCreationService
global ManualChangesApplicationService manualChangesApplicationService
@ -1414,6 +1416,27 @@ rule "X.8.1: Remove Entity when intersected by imported Entity"
end
// Rule unit: X.9
rule "X.9.0: Merge mostly contained signatures"
when
$aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI)
$signature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.ALGORITHM, mostlyContains($aiSignature, 0.8))
then
$aiSignature.remove("X.9.0", "removed because already contained by alogrithm signature");
$signature.addEngine(LayoutEngine.AI);
end
// Rule unit: X.10
rule "X.10.0: remove false positives of ai"
when
$anyImage: Image(engines contains LayoutEngine.ALGORITHM)
$aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI, !mostlyContainedBy($anyImage, 0.8))
then
$aiSignature.remove("X.10.0", "Removed because false positive");
end
//------------------------------------ File attributes rules ------------------------------------
// Rule unit: FA.1

View File

@ -54,6 +54,8 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
global Document document
global EntityCreationService entityCreationService
global ManualChangesApplicationService manualChangesApplicationService
@ -255,6 +257,27 @@ rule "X.8.1: Remove Entity when intersected by imported Entity"
end
// Rule unit: X.9
rule "X.9.0: Merge mostly contained signatures"
when
$aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI)
$signature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.ALGORITHM, mostlyContains($aiSignature, 0.8))
then
$aiSignature.remove("X.9.0", "removed because already contained by alogrithm signature");
$signature.addEngine(LayoutEngine.AI);
end
// Rule unit: X.10
rule "X.10.0: remove false positives of ai"
when
$anyImage: Image(engines contains LayoutEngine.ALGORITHM)
$aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI, !mostlyContainedBy($anyImage, 0.8))
then
$aiSignature.remove("X.10.0", "Removed because false positive");
end
//------------------------------------ Local dictionary search rules ------------------------------------
// Rule unit: LDS.0

View File

@ -54,6 +54,8 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
global Document document
global EntityCreationService entityCreationService
global ManualChangesApplicationService manualChangesApplicationService
@ -1166,6 +1168,27 @@ rule "X.8.1: Remove Entity when intersected by imported Entity"
end
// Rule unit: X.9
rule "X.9.0: Merge mostly contained signatures"
when
$aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI)
$signature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.ALGORITHM, mostlyContains($aiSignature, 0.8))
then
$aiSignature.remove("X.9.0", "removed because already contained by alogrithm signature");
$signature.addEngine(LayoutEngine.AI);
end
// Rule unit: X.10
rule "X.10.0: remove false positives of ai"
when
$anyImage: Image(engines contains LayoutEngine.ALGORITHM)
$aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI, !mostlyContainedBy($anyImage, 0.8))
then
$aiSignature.remove("X.10.0", "Removed because false positive");
end
//------------------------------------ File attributes rules ------------------------------------
// Rule unit: FA.1

View File

@ -54,6 +54,8 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
global Document document
global EntityCreationService entityCreationService
global ManualChangesApplicationService manualChangesApplicationService
@ -379,6 +381,27 @@ rule "X.8.1: Remove Entity when intersected by imported Entity"
end
// Rule unit: X.9
rule "X.9.0: Merge mostly contained signatures"
when
$aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI)
$signature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.ALGORITHM, mostlyContains($aiSignature, 0.8))
then
$aiSignature.remove("X.9.0", "removed because already contained by alogrithm signature");
$signature.addEngine(LayoutEngine.AI);
end
// Rule unit: X.10
rule "X.10.0: remove false positives of ai"
when
$anyImage: Image(engines contains LayoutEngine.ALGORITHM)
$aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI, !mostlyContainedBy($anyImage, 0.8))
then
$aiSignature.remove("X.10.0", "Removed because false positive");
end
//------------------------------------ File attributes rules ------------------------------------
// Rule unit: FA.1

View File

@ -54,6 +54,8 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
global Document document
global EntityCreationService entityCreationService
global ManualChangesApplicationService manualChangesApplicationService
@ -500,6 +502,27 @@ rule "X.8.1: Remove Entity when intersected by imported Entity"
end
// Rule unit: X.9
rule "X.9.0: Merge mostly contained signatures"
when
$aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI)
$signature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.ALGORITHM, mostlyContains($aiSignature, 0.8))
then
$aiSignature.remove("X.9.0", "removed because already contained by alogrithm signature");
$signature.addEngine(LayoutEngine.AI);
end
// Rule unit: X.10
rule "X.10.0: remove false positives of ai"
when
$anyImage: Image(engines contains LayoutEngine.ALGORITHM)
$aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI, !mostlyContainedBy($anyImage, 0.8))
then
$aiSignature.remove("X.10.0", "Removed because false positive");
end
//------------------------------------ File attributes rules ------------------------------------
// Rule unit: FA.1

View File

@ -54,6 +54,8 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
global Document document
global EntityCreationService entityCreationService
global ManualChangesApplicationService manualChangesApplicationService
@ -416,6 +418,27 @@ rule "X.8.1: Remove Entity when intersected by imported Entity"
end
// Rule unit: X.9
rule "X.9.0: Merge mostly contained signatures"
when
$aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI)
$signature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.ALGORITHM, mostlyContains($aiSignature, 0.8))
then
$aiSignature.remove("X.9.0", "removed because already contained by alogrithm signature");
$signature.addEngine(LayoutEngine.AI);
end
// Rule unit: X.10
rule "X.10.0: remove false positives of ai"
when
$anyImage: Image(engines contains LayoutEngine.ALGORITHM)
$aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI, !mostlyContainedBy($anyImage, 0.8))
then
$aiSignature.remove("X.10.0", "Removed because false positive");
end
//------------------------------------ File attributes rules ------------------------------------
// Rule unit: FA.1

View File

@ -33,7 +33,11 @@ public class RuleFileMigrator {
rulesToAdd.forEach(ruleFileBluePrint::addRule);
}
String migratedRulesString = RuleFileFactory.buildRuleString(ruleFileBluePrint);
RuleFileBluePrint newBluePrint = new RuleFileBluePrint(combinedBluePrint.imports(),
combinedBluePrint.globals(),
combinedBluePrint.queries(),
ruleFileBluePrint.ruleClasses());
String migratedRulesString = RuleFileFactory.buildRuleString(newBluePrint);
String migratedFilePath = ruleFile.getAbsolutePath();
try (var out = new FileOutputStream(migratedFilePath)) {
out.write(migratedRulesString.getBytes(StandardCharsets.UTF_8));

View File

@ -54,6 +54,8 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
global Document document
global EntityCreationService entityCreationService
global ManualChangesApplicationService manualChangesApplicationService
@ -1597,6 +1599,24 @@ rule "X.8.1: Remove Entity when intersected by imported Entity"
retract($other);
end
// Rule unit: X.9.0
rule "X.9.0: Merge mostly contained signatures"
when
$aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI)
$signature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.ALGORITHM, mostlyContains($aiSignature, 0.8))
then
$aiSignature.remove("X.9.0", "removed because already contained by alogrithm signature");
$signature.addEngine(LayoutEngine.AI);
end
// Rule unit: X.10.0
rule "X.10.0: remove false positives of ai"
when
$anyImage: Image(engines contains LayoutEngine.ALGORITHM)
$aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI, !mostlyContainedBy($anyImage, 0.8))
then
$aiSignature.remove("X.10.0", "Removed because false positive");
end
//------------------------------------ File attributes rules ------------------------------------

View File

@ -54,6 +54,8 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
global Document document
global EntityCreationService entityCreationService
global ManualChangesApplicationService manualChangesApplicationService
@ -1299,7 +1301,6 @@ rule "TAB.7.0: Indicator (Species)"
.ifPresent(redactionEntity -> redactionEntity.apply("TAB.7.0", "Vertebrate study found"));
end
//------------------------------------ Manual redaction rules ------------------------------------
// Rule unit: MAN.0
@ -1561,6 +1562,26 @@ rule "X.8.1: Remove Entity when intersected by imported Entity"
retract($other);
end
// Rule unit: X.9.0
rule "X.9.0: Merge mostly contained signatures"
when
$aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI)
$signature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.ALGORITHM, mostlyContains($aiSignature, 0.8))
then
$aiSignature.remove("X.9.0", "removed because already contained by alogrithm signature");
$signature.addEngine(LayoutEngine.AI);
end
// Rule unit: X.10.0
rule "X.10.0: remove false positives of ai"
when
$anyImage: Image(engines contains LayoutEngine.ALGORITHM)
$aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI, !mostlyContainedBy($anyImage, 0.8))
then
$aiSignature.remove("X.10.0", "Removed because false positive");
end
//------------------------------------ File attributes rules ------------------------------------