RED-3300 Improve impurity rule
This commit is contained in:
parent
db59ae014b
commit
62ec63cc55
@ -23,6 +23,7 @@ import java.time.ZoneOffset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
@ -2352,6 +2353,20 @@ public class RedactionIntegrationTest extends RulesIntegrationTest {
|
||||
assertEquals(entityLog.getEntityLogEntry().size(), 3);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPurityRule() {
|
||||
String EFSA_SANITISATION_RULES = loadFromClassPath("drools/efsa_sanitisation.drl");
|
||||
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(EFSA_SANITISATION_RULES));
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage("files/new/crafted document.pdf");
|
||||
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
|
||||
analyzeService.analyze(request);
|
||||
|
||||
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
var entriesCount = entityLog.getEntityLogEntry().stream().filter(e -> e.getValue().toLowerCase(Locale.ENGLISH).startsWith("purity")).collect(Collectors.toList()).size();
|
||||
assertEquals(7, entriesCount);
|
||||
}
|
||||
|
||||
|
||||
private IdRemoval getIdRemoval(String id) {
|
||||
|
||||
|
||||
@ -109,4 +109,41 @@ public class RegExPatternTest {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPurity() {
|
||||
String text = "purity: 100% -> ok\n"
|
||||
+ "purity: <100% -> ok\n"
|
||||
+ "purity: 9% -> ok\n"
|
||||
+ "purity: <200% -> not ok\n"
|
||||
+ "purity 45%aa -> not ok\n"
|
||||
+ "purity: <45% -> ok\n"
|
||||
+ "purity: >45% -> ok\n"
|
||||
+ "purity: 101% -> not ok\n"
|
||||
+ "purity: 99.9% -> ok\n"
|
||||
+ "purity: 99,9% -> ok\n"
|
||||
+ "purity: 99,90% -> ok\n"
|
||||
+ "purity: aa 45% -> not ok\n"
|
||||
+ "purity: 99% -> ok\n"
|
||||
+ "purity: 99.99% -> ok\n"
|
||||
+ "purity: 100.00% -> ok?\n"
|
||||
+ "purity: <=45% -> not ok\n"
|
||||
+ "purity: >=45% -> not ok\n"
|
||||
+ "purity: <>45% -> not ok\n"
|
||||
+ "purity: =<45% -> not ok\n"
|
||||
+ "purity: =>45% -> not ok\n"
|
||||
+ "purity: aa45% -> not ok\n"
|
||||
+ "purity: 045% -> not ok\n"
|
||||
+ "purity: .45% -> not ok \n"
|
||||
+ "purity: 1000% -> not ok";
|
||||
String text2 = "Rule 39: Purity Hint Add Purity as Hint when Percent-Numbers is there Test Item: Soda Purity: 45% ← should be Hint Purity: <45% ← should be Hint Purity: >45% ← should be Hint Purity: 101% ← should ne be Hint because >100 % is not possible Purity: =>45% ← should be not Hint because additional symbols Purity: =<45% ← should be not Hint because additional symbols Purity: aa 45% ← should be not Hint because additional symbols Purity: 45% aa ← should be not Hint because additional symbols Purity: aa45% ← should be not Hint because additional symbols Purity: 45%aa ← should be not Hint because additional symbols Product-Code: EAK-L443 purity: 99% ← not Hint because case sensitive purity: >99% ← not Hint because case sensitive purity: <99% ← not Hint because case sensitive Supplier: GreenForce ";
|
||||
Pattern p = Pattern.compile("(purity ?( of|\\(.{1,20}\\))?( ?:)?) [<>]{0,1}(100|([1-9]{1}[0-9]{0,1}([.,]{1}[0-9]{1,2})?)) ?% ", Pattern.CASE_INSENSITIVE);
|
||||
Matcher matcher = p.matcher(text);
|
||||
while (matcher.find()) {
|
||||
String match = matcher.group(0);
|
||||
String match1 = matcher.group(1);
|
||||
System.out.println("Group 0: " + match);
|
||||
System.out.println("Group 1: " + match1);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -909,7 +909,7 @@ rule "ETC.0.0: Purity Hint"
|
||||
when
|
||||
$section: Section(containsStringIgnoreCase("purity"))
|
||||
then
|
||||
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", "hint_only", EntityType.HINT, 1, $section)
|
||||
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) [<>]{0,1}(100|([1-9]{1}[0-9]{0,1}([.,]{1}[0-9]{1,2})?)) ?% ", "hint_only", EntityType.HINT, 1, $section)
|
||||
.forEach(hint -> hint.skip("ETC.0.0", "hint only"));
|
||||
end
|
||||
|
||||
|
||||
@ -1497,7 +1497,7 @@ rule "ETC.0.0: Purity Hint"
|
||||
when
|
||||
$section: Section(containsStringIgnoreCase("purity"))
|
||||
then
|
||||
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", "hint_only", EntityType.HINT, 1, $section)
|
||||
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) [<>]{0,1}(100|([1-9]{1}[0-9]{0,1}([.,]{1}[0-9]{1,2})?)) ?% ", "hint_only", EntityType.HINT, 1, $section)
|
||||
.forEach(hint -> hint.skip("ETC.0.0", "hint only"));
|
||||
end
|
||||
|
||||
|
||||
@ -646,7 +646,7 @@ rule "ETC.0.0: Purity Hint"
|
||||
when
|
||||
$section: Section(containsStringIgnoreCase("purity"))
|
||||
then
|
||||
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", "hint_only", EntityType.HINT, 1, $section)
|
||||
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) [<>]{0,1}(100|([1-9]{1}[0-9]{0,1}([.,]{1}[0-9]{1,2})?)) ?% ", "hint_only", EntityType.HINT, 1, $section)
|
||||
.forEach(hint -> hint.skip("ETC.0.0", "hint only"));
|
||||
end
|
||||
|
||||
|
||||
@ -627,7 +627,7 @@ rule "ETC.0.0: Purity Hint"
|
||||
when
|
||||
$section: Section(containsStringIgnoreCase("purity"))
|
||||
then
|
||||
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", "hint_only", EntityType.HINT, 1, $section)
|
||||
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) [<>]{0,1}(100|([1-9]{1}[0-9]{0,1}([.,]{1}[0-9]{1,2})?)) ?% ", "hint_only", EntityType.HINT, 1, $section)
|
||||
.forEach(hint -> hint.skip("ETC.0.0", "hint only"));
|
||||
end
|
||||
|
||||
|
||||
@ -1514,7 +1514,7 @@ rule "ETC.0.0: Purity Hint"
|
||||
when
|
||||
$section: Section(containsStringIgnoreCase("purity"))
|
||||
then
|
||||
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", "hint_only", EntityType.HINT, 1, $section)
|
||||
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) [<>]{0,1}(100|([1-9]{1}[0-9]{0,1}([.,]{1}[0-9]{1,2})?)) ?% ", "hint_only", EntityType.HINT, 1, $section)
|
||||
.forEach(hint -> hint.skip("ETC.0.0", "hint only"));
|
||||
end
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user