diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index dc14e5a8..e0ec1bb8 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -80,6 +80,20 @@ public class Section { } + public void redactIfPrecededBy(String prefix, String type, int ruleNumber, String reason) { + + entities.forEach(entity -> { + if (entity.getType().equals(type)) { + if (searchText.indexOf(prefix + entity.getWord()) != 1) { + entity.setRedaction(true); + entity.setMatchedRule(ruleNumber); + entity.setRedactionReason(reason); + } + } + }); + } + + public void redactLineAfter(String start, String asType, int ruleNumber, String reason) { String[] values = StringUtils.substringsBetween(text, start, "\n"); @@ -141,7 +155,8 @@ public class Section { if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(searchText.charAt(startIndex - 1)) || isSeparator(searchText .charAt(startIndex - 1))) && (stopIndex == searchText.length() || isSeparator(searchText.charAt(stopIndex)))) { - found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex, headline, sectionNumber)); + found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex, + headline, sectionNumber)); } } while (startIndex > -1); @@ -197,7 +212,8 @@ public class Section { if (value == null) { log.warn("Could not find any data for {}.", cellHeader); } else { - Entity entity = new Entity(value.toString(), type, value.getRowSpanStart(), value.getRowSpanStart() + value.toString() + Entity entity = new Entity(value.toString(), type, value.getRowSpanStart(), + value.getRowSpanStart() + value.toString() .length(), headline, sectionNumber); entity.setRedaction(redact); entity.setMatchedRule(ruleNumber); @@ -206,7 +222,7 @@ public class Section { .getSequences()); // Make sure no other cells with same content are highlighted // HashSet keeps the older value, but we want the new only. - if(entities.contains(entity)){ + if (entities.contains(entity)) { entities.remove(entity); } entities.add(entity); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 4a85444d..73526fbc 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -63,6 +63,7 @@ public class RedactionIntegrationTest { private static final String VERTEBRATES_CODE = "vertebrate"; private static final String ADDRESS_CODE = "address"; private static final String NAME_CODE = "name"; + private static final String SPONSOR = "sponsor"; private static final String NO_REDACTION_INDICATOR = "no_redaction_indicator"; private static final String REDACTION_INDICATOR = "redaction_indicator"; private static final String HINT_ONLY = "hint_only"; @@ -117,6 +118,7 @@ public class RedactionIntegrationTest { when(dictionaryClient.getDictionaryForType(VERTEBRATES_CODE)).thenReturn(getDictionaryResponse(VERTEBRATES_CODE)); when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(getDictionaryResponse(ADDRESS_CODE)); when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(getDictionaryResponse(NAME_CODE)); + when(dictionaryClient.getDictionaryForType(SPONSOR)).thenReturn(getDictionaryResponse(SPONSOR)); when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR)); when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR)); when(dictionaryClient.getDictionaryForType(HINT_ONLY)).thenReturn(getDictionaryResponse(HINT_ONLY)); @@ -132,6 +134,11 @@ public class RedactionIntegrationTest { .stream() .map(this::cleanDictionaryEntry) .collect(Collectors.toSet())); + dictionary.computeIfAbsent(SPONSOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/sponsor_companies.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); dictionary.computeIfAbsent(VERTEBRATES_CODE, v -> new ArrayList<>()) .addAll(ResourceLoader.load("dictionaries/vertebrates.txt") .stream() @@ -176,6 +183,7 @@ public class RedactionIntegrationTest { typeColorMap.put(VERTEBRATES_CODE, new float[]{0, 1, 0}); typeColorMap.put(ADDRESS_CODE, new float[]{0, 1, 1}); typeColorMap.put(NAME_CODE, new float[]{1, 1, 0}); + typeColorMap.put(SPONSOR, new float[]{.5f, .5f, .5f}); typeColorMap.put(NO_REDACTION_INDICATOR, new float[]{0.8f, 0, 0.8f}); typeColorMap.put(REDACTION_INDICATOR, new float[]{1, 0.502f, 0.1f}); typeColorMap.put(HINT_ONLY, new float[]{0.8f, 1, 0.8f}); @@ -184,6 +192,7 @@ public class RedactionIntegrationTest { hintTypeMap.put(VERTEBRATES_CODE, true); hintTypeMap.put(ADDRESS_CODE, false); hintTypeMap.put(NAME_CODE, false); + hintTypeMap.put(SPONSOR, false); hintTypeMap.put(NO_REDACTION_INDICATOR, true); hintTypeMap.put(REDACTION_INDICATOR, true); hintTypeMap.put(HINT_ONLY, true); @@ -192,6 +201,7 @@ public class RedactionIntegrationTest { caseInSensitiveMap.put(VERTEBRATES_CODE, true); caseInSensitiveMap.put(ADDRESS_CODE, false); caseInSensitiveMap.put(NAME_CODE, false); + caseInSensitiveMap.put(SPONSOR, false); caseInSensitiveMap.put(NO_REDACTION_INDICATOR, true); caseInSensitiveMap.put(REDACTION_INDICATOR, true); caseInSensitiveMap.put(HINT_ONLY, true); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/sponsor_companies.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/sponsor_companies.txt new file mode 100644 index 00000000..2b1652fa --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/sponsor_companies.txt @@ -0,0 +1,7 @@ +Monthey Syngenta Crop Protection AG, Basel, Switzerland +Syngenta Crop Protection, Monthey, Switzerland +Fine Organics Limited, Middlesbrough, United Kingdom +Syngenta Monthey Switzerland +Hunan Haili Chemical Industry Co., Ltd., Hunan, China +Syngenta, Switzerland +Syngenta Nantong, China \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index 2490aad2..2c31a62b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -49,6 +49,7 @@ rule "5: Do not redact in guideline sections" section.redactNot("address", 5, "Section is a guideline section."); end + rule "6: Redact contact information if applicant is found" when eval(section.headlineContainsWord("applicant") || section.getText().contains("Applicant") || section.headlineContainsWord("Primary contact") || section.headlineContainsWord("Alternative contact")); @@ -124,3 +125,10 @@ rule "10: Redact Authors and Addresses in Reference Table if it is a Vertebrate section.redact("address", 10, "Redacted because row is a vertebrate study"); section.highlightCell("Vertebrate study Y/N", 10, "must_redact"); end + +rule "11: Redact sponsor company" + when + Section(section.getText().toLowerCase().contains("batches produced at")) + then + section.redactIfPrecededBy("batches produced at", "sponsor", 11, "Redacted because it represents a sponsor company"); + end \ No newline at end of file