RED-3300 Improve impurity rule

This commit is contained in:
Corina Olariu 2024-11-26 13:10:14 +01:00
parent db59ae014b
commit 62ec63cc55
8 changed files with 58 additions and 6 deletions

View File

@ -23,6 +23,7 @@ import java.time.ZoneOffset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
@ -2352,6 +2353,20 @@ public class RedactionIntegrationTest extends RulesIntegrationTest {
assertEquals(entityLog.getEntityLogEntry().size(), 3);
}
@Test
public void testPurityRule() {
String EFSA_SANITISATION_RULES = loadFromClassPath("drools/efsa_sanitisation.drl");
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(EFSA_SANITISATION_RULES));
AnalyzeRequest request = uploadFileToStorage("files/new/crafted document.pdf");
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
analyzeService.analyze(request);
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var entriesCount = entityLog.getEntityLogEntry().stream().filter(e -> e.getValue().toLowerCase(Locale.ENGLISH).startsWith("purity")).collect(Collectors.toList()).size();
assertEquals(7, entriesCount);
}
private IdRemoval getIdRemoval(String id) {

View File

@ -109,4 +109,41 @@ public class RegExPatternTest {
}
}
@Test
public void testPurity() {
String text = "purity: 100% -> ok\n"
+ "purity: <100% -> ok\n"
+ "purity: 9% -> ok\n"
+ "purity: <200% -> not ok\n"
+ "purity 45%aa -> not ok\n"
+ "purity: <45% -> ok\n"
+ "purity: >45% -> ok\n"
+ "purity: 101% -> not ok\n"
+ "purity: 99.9% -> ok\n"
+ "purity: 99,9% -> ok\n"
+ "purity: 99,90% -> ok\n"
+ "purity: aa 45% -> not ok\n"
+ "purity: 99% -> ok\n"
+ "purity: 99.99% -> ok\n"
+ "purity: 100.00% -> ok?\n"
+ "purity: <=45% -> not ok\n"
+ "purity: >=45% -> not ok\n"
+ "purity: <>45% -> not ok\n"
+ "purity: =<45% -> not ok\n"
+ "purity: =>45% -> not ok\n"
+ "purity: aa45% -> not ok\n"
+ "purity: 045% -> not ok\n"
+ "purity: .45% -> not ok \n"
+ "purity: 1000% -> not ok";
String text2 = "Rule 39: Purity Hint Add Purity as Hint when Percent-Numbers is there Test Item: Soda Purity: 45% ← should be Hint Purity: <45% ← should be Hint Purity: >45% ← should be Hint Purity: 101% ← should ne be Hint because >100 % is not possible Purity: =>45% ← should be not Hint because additional symbols Purity: =<45% ← should be not Hint because additional symbols Purity: aa 45% ← should be not Hint because additional symbols Purity: 45% aa ← should be not Hint because additional symbols Purity: aa45% ← should be not Hint because additional symbols Purity: 45%aa ← should be not Hint because additional symbols Product-Code: EAK-L443 purity: 99% ← not Hint because case sensitive purity: >99% ← not Hint because case sensitive purity: <99% ← not Hint because case sensitive Supplier: GreenForce ";
Pattern p = Pattern.compile("(purity ?( of|\\(.{1,20}\\))?( ?:)?) [<>]{0,1}(100|([1-9]{1}[0-9]{0,1}([.,]{1}[0-9]{1,2})?)) ?% ", Pattern.CASE_INSENSITIVE);
Matcher matcher = p.matcher(text);
while (matcher.find()) {
String match = matcher.group(0);
String match1 = matcher.group(1);
System.out.println("Group 0: " + match);
System.out.println("Group 1: " + match1);
}
}
}

View File

@ -909,7 +909,7 @@ rule "ETC.0.0: Purity Hint"
when
$section: Section(containsStringIgnoreCase("purity"))
then
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", "hint_only", EntityType.HINT, 1, $section)
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) [<>]{0,1}(100|([1-9]{1}[0-9]{0,1}([.,]{1}[0-9]{1,2})?)) ?% ", "hint_only", EntityType.HINT, 1, $section)
.forEach(hint -> hint.skip("ETC.0.0", "hint only"));
end

View File

@ -1497,7 +1497,7 @@ rule "ETC.0.0: Purity Hint"
when
$section: Section(containsStringIgnoreCase("purity"))
then
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", "hint_only", EntityType.HINT, 1, $section)
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) [<>]{0,1}(100|([1-9]{1}[0-9]{0,1}([.,]{1}[0-9]{1,2})?)) ?% ", "hint_only", EntityType.HINT, 1, $section)
.forEach(hint -> hint.skip("ETC.0.0", "hint only"));
end

View File

@ -646,7 +646,7 @@ rule "ETC.0.0: Purity Hint"
when
$section: Section(containsStringIgnoreCase("purity"))
then
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", "hint_only", EntityType.HINT, 1, $section)
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) [<>]{0,1}(100|([1-9]{1}[0-9]{0,1}([.,]{1}[0-9]{1,2})?)) ?% ", "hint_only", EntityType.HINT, 1, $section)
.forEach(hint -> hint.skip("ETC.0.0", "hint only"));
end

View File

@ -627,7 +627,7 @@ rule "ETC.0.0: Purity Hint"
when
$section: Section(containsStringIgnoreCase("purity"))
then
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", "hint_only", EntityType.HINT, 1, $section)
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) [<>]{0,1}(100|([1-9]{1}[0-9]{0,1}([.,]{1}[0-9]{1,2})?)) ?% ", "hint_only", EntityType.HINT, 1, $section)
.forEach(hint -> hint.skip("ETC.0.0", "hint only"));
end

View File

@ -1514,7 +1514,7 @@ rule "ETC.0.0: Purity Hint"
when
$section: Section(containsStringIgnoreCase("purity"))
then
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", "hint_only", EntityType.HINT, 1, $section)
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) [<>]{0,1}(100|([1-9]{1}[0-9]{0,1}([.,]{1}[0-9]{1,2})?)) ?% ", "hint_only", EntityType.HINT, 1, $section)
.forEach(hint -> hint.skip("ETC.0.0", "hint only"));
end