From 608ea4bbcc3943dc8f37d3ad2254dd095edd50fd Mon Sep 17 00:00:00 2001 From: deiflaender Date: Thu, 3 Dec 2020 11:52:09 +0100 Subject: [PATCH] RED-824: Add author recommendations based on vertebrate study tables --- .../redaction/v1/model/RedactionLogEntry.java | 1 + .../redaction-service-server-v1/pom.xml | 2 +- .../redaction/model/DictionaryModel.java | 5 +- .../v1/server/redaction/model/Section.java | 50 ++++-- .../redaction/service/DictionaryService.java | 84 ++++++++-- .../service/EntityRedactionService.java | 68 +++++--- .../v1/server/redaction/utils/Patterns.java | 12 ++ .../service/AnnotationHighlightService.java | 5 + .../v1/server/RedactionIntegrationTest.java | 61 +++++++- .../service/EntityRedactionServiceTest.java | 31 +++- .../resources/dictionaries/CBI_author.txt | 2 +- .../resources/dictionaries/false_positive.txt | 2 + .../resources/dictionaries/must_redact.txt | 1 - .../recommendation_CBI_address.txt | 0 .../recommendation_CBI_author.txt | 0 .../resources/dictionaries/vertebrate.txt | 1 - .../src/test/resources/drools/rules.drl | 148 ++++++++++-------- 17 files changed, 353 insertions(+), 120 deletions(-) create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/false_positive.txt create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/recommendation_CBI_address.txt create mode 100644 redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/recommendation_CBI_author.txt diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java index 61876708..ad6ea850 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java @@ -22,6 +22,7 @@ public class RedactionLogEntry { private String legalBasis; private boolean redacted; private boolean isHint; + private boolean isRecommendation; private String section; private float[] color; diff --git a/redaction-service-v1/redaction-service-server-v1/pom.xml b/redaction-service-v1/redaction-service-server-v1/pom.xml index df5aa5e9..1b837357 100644 --- a/redaction-service-v1/redaction-service-server-v1/pom.xml +++ b/redaction-service-v1/redaction-service-server-v1/pom.xml @@ -20,7 +20,7 @@ com.iqser.red.service configuration-service-api-v1 - 1.3.5 + 1.3.7 org.drools diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java index 0a5e1032..a8b050a1 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java @@ -1,11 +1,11 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; +import java.util.Set; + import lombok.AllArgsConstructor; import lombok.Data; -import java.util.Set; - @Data @AllArgsConstructor public class DictionaryModel { @@ -15,6 +15,7 @@ public class DictionaryModel { private float[] color; private boolean caseInsensitive; private boolean hint; + private boolean recommendation; private Set entries; private Set localEntries; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index 640a4f4b..fd91d244 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -1,15 +1,19 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; +import static com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService.RECOMMENDATION_PREFIX; + import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.lang3.StringUtils; import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService; +import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns; import lombok.Builder; import lombok.Data; @@ -62,8 +66,11 @@ public class Section { public void redact(String type, int ruleNumber, String reason, String legalBasis) { + boolean hasRecommendactionDictionary = dictionaryService.hasRecommendationDictionary(type); + entities.forEach(entity -> { - if (entity.getType().equals(type)) { + if (entity.getType().equals(type) || hasRecommendactionDictionary && entity.getType() + .equals(RECOMMENDATION_PREFIX + type)) { entity.setRedaction(true); entity.setMatchedRule(ruleNumber); entity.setRedactionReason(reason); @@ -75,8 +82,11 @@ public class Section { public void redactNot(String type, int ruleNumber, String reason) { + boolean hasRecommendactionDictionary = dictionaryService.hasRecommendationDictionary(type); + entities.forEach(entity -> { - if (entity.getType().equals(type)) { + if (entity.getType().equals(type) || hasRecommendactionDictionary && entity.getType() + .equals(RECOMMENDATION_PREFIX + type)) { entity.setRedaction(false); entity.setMatchedRule(ruleNumber); entity.setRedactionReason(reason); @@ -247,24 +257,26 @@ public class Section { public void highlightCell(String cellHeader, int ruleNumber, String type) { - annotateCell(cellHeader, ruleNumber, type, false, null, null); + annotateCell(cellHeader, ruleNumber, type, false, false, null, null); } - public void redactCell(String cellHeader, int ruleNumber, String type, String reason, String legalBasis) { + public void redactCell(String cellHeader, int ruleNumber, String type, boolean addAsRecommendations, String reason, + String legalBasis) { - annotateCell(cellHeader, ruleNumber, type, true, reason, legalBasis); + annotateCell(cellHeader, ruleNumber, type, true, addAsRecommendations, reason, legalBasis); } - public void redactNotCell(String cellHeader, int ruleNumber, String type, String reason) { + public void redactNotCell(String cellHeader, int ruleNumber, String type, boolean addAsRecommendations, + String reason) { - annotateCell(cellHeader, ruleNumber, type, false, reason, null); + annotateCell(cellHeader, ruleNumber, type, false, addAsRecommendations, reason, null); } - private void annotateCell(String cellHeader, int ruleNumber, String type, boolean redact, String reason, - String legalBasis) { + private void annotateCell(String cellHeader, int ruleNumber, String type, boolean redact, + boolean addAsRecommendations, String reason, String legalBasis) { String cleanHeaderName = cellHeader.replaceAll("\n", "").replaceAll(" ", "").replaceAll("-", ""); @@ -273,6 +285,7 @@ public class Section { log.warn("Could not find any data for {}.", cellHeader); } else { String word = value.toString(); + Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber); entity.setRedaction(redact); entity.setMatchedRule(ruleNumber); @@ -286,6 +299,25 @@ public class Section { entities.add(entity); entities = removeEntitiesContainedInLarger(entities); + + if (addAsRecommendations) { + String cleanedWord = word.replaceAll(",", " ").replaceAll(" ", " ").trim() + " "; + Pattern pattern = Patterns.AUTHOR_TABLE_SPITTER; + Matcher matcher = pattern.matcher(cleanedWord); + + while (matcher.find()) { + String match = matcher.group().trim(); + if (match.length() >= 3) { + if(!dictionaryService.getDictionary(type).getEntries().contains(match) && !dictionaryService.getDictionary(RECOMMENDATION_PREFIX + type).getEntries().contains(match)) { + dictionaryService.addToLocalDictionary(RECOMMENDATION_PREFIX + type, match); + } + String lastname = match.split(" ")[0]; + if(!dictionaryService.getDictionary(type).getEntries().contains(lastname) && !dictionaryService.getDictionary(RECOMMENDATION_PREFIX + type).getEntries().contains(lastname)) { + dictionaryService.addToLocalDictionary(RECOMMENDATION_PREFIX + type, lastname); + } + } + } + } } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java index b493c5d2..df1f6ad2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java @@ -1,17 +1,5 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; -import com.iqser.red.service.configuration.v1.api.model.Colors; -import com.iqser.red.service.configuration.v1.api.model.TypeResponse; -import com.iqser.red.service.configuration.v1.api.model.TypeResult; -import com.iqser.red.service.redaction.v1.server.client.DictionaryClient; -import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel; -import feign.FeignException; -import lombok.Getter; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import org.apache.commons.collections4.CollectionUtils; -import org.springframework.stereotype.Service; - import java.awt.Color; import java.util.ArrayList; import java.util.Comparator; @@ -22,11 +10,27 @@ import java.util.Map; import java.util.Set; import java.util.stream.Collectors; +import org.apache.commons.collections4.CollectionUtils; +import org.springframework.stereotype.Service; + +import com.iqser.red.service.configuration.v1.api.model.Colors; +import com.iqser.red.service.configuration.v1.api.model.TypeResponse; +import com.iqser.red.service.configuration.v1.api.model.TypeResult; +import com.iqser.red.service.redaction.v1.server.client.DictionaryClient; +import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel; + +import feign.FeignException; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + @Slf4j @Service @RequiredArgsConstructor public class DictionaryService { + public static final String RECOMMENDATION_PREFIX = "recommendation_"; + private final DictionaryClient dictionaryClient; @Getter @@ -47,17 +51,24 @@ public class DictionaryService { @Getter private float[] notRedactedColor; + private Map localAccessMap = new HashMap<>(); + public boolean hasLocalEntries() { + return this.dictionary.stream().anyMatch(dm -> !dm.getLocalEntries().isEmpty()); } + public void addToLocalDictionary(String type, String value) { + localAccessMap.get(type).getLocalEntries().add(value); } + public void clearLocalEntries() { + this.dictionary.forEach(dm -> dm.getLocalEntries().clear()); } @@ -80,8 +91,8 @@ public class DictionaryService { dictionary = typeResponse.getTypes() .stream() - .map(t -> - new DictionaryModel(t.getType(), t.getRank(), convertColor(t.getHexColor()), t.isCaseInsensitive(), t.isHint(), convertEntries(t), new HashSet<>())) + .map(t -> new DictionaryModel(t.getType(), t.getRank(), convertColor(t.getHexColor()), t.isCaseInsensitive(), t + .isHint(), t.isRecommendation(), convertEntries(t), new HashSet<>())) .sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed()) .collect(Collectors.toList()); @@ -101,6 +112,19 @@ public class DictionaryService { } + public void updateExternalDictionary(){ + dictionary.forEach(dm -> { + if(dm.isRecommendation() && !dm.getLocalEntries().isEmpty()){ + dictionaryClient.addEntries(dm.getType(), new ArrayList<>(dm.getLocalEntries()), false); + long externalVersion = dictionaryClient.getVersion(); + if(externalVersion == dictionaryVersion + 1){ + dictionaryVersion = externalVersion; + } + } + }); + } + + private Set convertEntries(TypeResult t) { if (t.isCaseInsensitive()) { @@ -121,7 +145,9 @@ public class DictionaryService { return new float[]{color.getRed() / 255f, color.getGreen() / 255f, color.getBlue() / 255f}; } + public boolean isCaseInsensitiveDictionary(String type) { + DictionaryModel dictionaryModel = localAccessMap.get(type); if (dictionaryModel != null) { return dictionaryModel.isCaseInsensitive(); @@ -129,7 +155,9 @@ public class DictionaryService { return false; } + public float[] getColor(String type) { + DictionaryModel model = localAccessMap.get(type); if (model != null) { return model.getColor(); @@ -137,11 +165,39 @@ public class DictionaryService { return defaultColor; } + public boolean isHint(String type) { + DictionaryModel model = localAccessMap.get(type); if (model != null) { return model.isHint(); } return false; } + + public boolean isRecommendation(String type) { + + DictionaryModel model = localAccessMap.get(type); + if (model != null) { + return model.isRecommendation(); + } + return false; + } + + + public boolean hasRecommendationDictionary(String type) { + + DictionaryModel model = localAccessMap.get(RECOMMENDATION_PREFIX + type); + if (model != null) { + return true; + } + return false; + } + + + public DictionaryModel getDictionary(String type) { + + return localAccessMap.get(type); + } + } \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index 1af41b03..7622e8dc 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -1,5 +1,19 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.springframework.stereotype.Service; + import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry; import com.iqser.red.service.redaction.v1.model.ManualRedactions; import com.iqser.red.service.redaction.v1.model.Rectangle; @@ -14,18 +28,8 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText; import com.iqser.red.service.redaction.v1.server.redaction.model.Section; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; -import lombok.RequiredArgsConstructor; -import org.apache.commons.collections4.CollectionUtils; -import org.apache.commons.lang3.StringUtils; -import org.springframework.stereotype.Service; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.regex.Pattern; +import lombok.RequiredArgsConstructor; @Service @RequiredArgsConstructor @@ -34,19 +38,31 @@ public class EntityRedactionService { private final DictionaryService dictionaryService; private final DroolsExecutionService droolsExecutionService; + public void processDocument(Document classifiedDoc, ManualRedactions manualRedactions) { dictionaryService.updateDictionary(); droolsExecutionService.updateRules(); dictionaryService.clearLocalEntries(); - Set documentEntities = new HashSet<>(findEntities(classifiedDoc, manualRedactions, false)); + Set documentEntities = new HashSet<>(findEntities(classifiedDoc, manualRedactions, false, null)); if (dictionaryService.hasLocalEntries()) { - Set foundByLocal = findEntities(classifiedDoc, manualRedactions, true); + + Map> hintsPerSectionNumber = new HashMap<>(); + documentEntities.stream().forEach(entity -> { + if (dictionaryService.isHint(entity.getType())) { + hintsPerSectionNumber.computeIfAbsent(entity.getSectionNumber(), (x) -> new HashSet<>()) + .add(entity); + } + }); + + Set foundByLocal = findEntities(classifiedDoc, manualRedactions, true, hintsPerSectionNumber); // HashSet keeps the older value, but we want the new only. documentEntities.removeAll(foundByLocal); documentEntities.addAll(foundByLocal); + + removeEntitiesContainedInLarger(documentEntities); } for (Entity entity : documentEntities) { @@ -60,14 +76,18 @@ public class EntityRedactionService { classifiedDoc.getEntities() .computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>()) .add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry - .getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity.getLegalBasis())); + .getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity + .getLegalBasis())); } } + dictionaryService.updateExternalDictionary(); } - private Set findEntities(Document classifiedDoc, ManualRedactions manualRedactions, boolean localEntries) { + private Set findEntities(Document classifiedDoc, ManualRedactions manualRedactions, boolean localEntries, + Map> hintsPerSectionNumber) { + Set documentEntities = new HashSet<>(); int sectionNumber = 1; for (Paragraph paragraph : classifiedDoc.getParagraphs()) { @@ -106,7 +126,9 @@ public class EntityRedactionService { Section analysedRowSection = droolsExecutionService.executeRules(Section.builder() .dictionaryService(dictionaryService) - .entities(rowEntities) + .entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber) ? Stream + .concat(rowEntities.stream(), hintsPerSectionNumber.get(sectionNumber).stream()) + .collect(Collectors.toSet()) : rowEntities) .text(searchableRow.getAsStringWithLinebreaks()) .searchText(searchableRow.toString()) .headline(table.getHeadline()) @@ -124,7 +146,9 @@ public class EntityRedactionService { Set entities = findEntities(searchableText, paragraph.getHeadline(), sectionNumber, localEntries); Section analysedSection = droolsExecutionService.executeRules(Section.builder() .dictionaryService(dictionaryService) - .entities(entities) + .entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber) ? Stream + .concat(entities.stream(), hintsPerSectionNumber.get(sectionNumber).stream()) + .collect(Collectors.toSet()) : entities) .text(searchableText.getAsStringWithLinebreaks()) .searchText(searchableText.toString()) .headline(paragraph.getHeadline()) @@ -143,7 +167,10 @@ public class EntityRedactionService { removeEntitiesContainedInLarger(entities); for (Entity entity : entities) { - entity.setPositionSequences(text.getSequences(entity.getWord(), dictionaryService.isCaseInsensitiveDictionary(entity.getType()), entity.getTargetSequences())); + if(entity.getPositionSequences().isEmpty()) { + entity.setPositionSequences(text.getSequences(entity.getWord(), dictionaryService.isCaseInsensitiveDictionary(entity + .getType()), entity.getTargetSequences())); + } } return entities; @@ -204,7 +231,7 @@ public class EntityRedactionService { for (Entity word : entities) { for (Entity inner : entities) { if (inner.getWord().length() < word.getWord() - .length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner) { + .length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner && word.getSectionNumber() == inner.getSectionNumber()) { wordsToRemove.add(inner); } } @@ -213,7 +240,8 @@ public class EntityRedactionService { } - private void addSectionToManualRedactions(List textBlocks, ManualRedactions manualRedactions, String section, int sectionNumber) { + private void addSectionToManualRedactions(List textBlocks, ManualRedactions manualRedactions, + String section, int sectionNumber) { if (manualRedactions == null || manualRedactions.getEntriesToAdd().isEmpty()) { return; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java new file mode 100644 index 00000000..3e20e767 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java @@ -0,0 +1,12 @@ +package com.iqser.red.service.redaction.v1.server.redaction.utils; + +import java.util.regex.Pattern; + +import lombok.experimental.UtilityClass; + +@UtilityClass +public class Patterns { + + public static Pattern AUTHOR_TABLE_SPITTER = Pattern.compile("((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,}( ?[A-ZÄÖÜ]{1,2}\\.)+|((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,}( ?[A-ZÄÖÜ]{1,2} )+"); + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java index f2f09acd..c412408a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/AnnotationHighlightService.java @@ -270,6 +270,7 @@ public class AnnotationHighlightService { .type(entity.getType()) .redacted(entity.isRedaction()) .isHint(isHint(entity)) + .isRecommendation(isRecommendation(entity)) .section(entity.getHeadline()) .sectionNumber(entity.getSectionNumber()) .matchedRule(entity.getMatchedRule()) @@ -425,6 +426,10 @@ public class AnnotationHighlightService { return dictionaryService.isHint(entity.getType()); } + private boolean isRecommendation(Entity entity) { + return dictionaryService.isRecommendation(entity.getType()); + } + private void drawSectionFrames(PDDocument document, Document classifiedDoc, boolean flatRedaction, PDPage pdPage, int page) throws IOException { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 5f51f6e5..9f07a1c9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -77,6 +77,11 @@ public class RedactionIntegrationTest { private static final String PUBLISHED_INFORMATION = "published_information"; private static final String TEST_METHOD = "test_method"; + private static final String RECOMMENDATION_AUTHOR = "recommendation_CBI_author"; + private static final String RECOMMENDATION_ADDRESS = "recommendation_CBI_address"; + + private static final String FALSE_POSITIVE = "false_positive"; + private static final String PII = "PII"; @Autowired @@ -92,6 +97,7 @@ public class RedactionIntegrationTest { private final Map typeColorMap = new HashMap<>(); private final Map hintTypeMap = new HashMap<>(); private final Map caseInSensitiveMap = new HashMap<>(); + private final Map recommendationTypeMap = new HashMap<>(); private final Colors colors = new Colors(); @TestConfiguration @@ -137,6 +143,9 @@ public class RedactionIntegrationTest { when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION)); when(dictionaryClient.getDictionaryForType(TEST_METHOD)).thenReturn(getDictionaryResponse(TEST_METHOD)); when(dictionaryClient.getDictionaryForType(PII)).thenReturn(getDictionaryResponse(PII)); + when(dictionaryClient.getDictionaryForType(RECOMMENDATION_AUTHOR)).thenReturn(getDictionaryResponse(RECOMMENDATION_AUTHOR)); + when(dictionaryClient.getDictionaryForType(RECOMMENDATION_ADDRESS)).thenReturn(getDictionaryResponse(RECOMMENDATION_ADDRESS)); + when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE)).thenReturn(getDictionaryResponse(FALSE_POSITIVE)); when(dictionaryClient.getColors()).thenReturn(colors); } @@ -198,6 +207,21 @@ public class RedactionIntegrationTest { .stream() .map(this::cleanDictionaryEntry) .collect(Collectors.toSet())); + dictionary.computeIfAbsent(RECOMMENDATION_AUTHOR, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/recommendation_CBI_author.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(RECOMMENDATION_ADDRESS, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/recommendation_CBI_address.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); + dictionary.computeIfAbsent(FALSE_POSITIVE, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/false_positive.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); } @@ -220,7 +244,9 @@ public class RedactionIntegrationTest { typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff"); typeColorMap.put(TEST_METHOD, "#91fae8"); typeColorMap.put(PII, "#66ccff"); - + typeColorMap.put(RECOMMENDATION_AUTHOR, "#8df06c"); + typeColorMap.put(RECOMMENDATION_ADDRESS, "#8df06c"); + typeColorMap.put(FALSE_POSITIVE, "#ffffff"); hintTypeMap.put(VERTEBRATE, true); hintTypeMap.put(ADDRESS, false); @@ -233,6 +259,9 @@ public class RedactionIntegrationTest { hintTypeMap.put(PUBLISHED_INFORMATION, true); hintTypeMap.put(TEST_METHOD, true); hintTypeMap.put(PII, false); + hintTypeMap.put(RECOMMENDATION_AUTHOR, false); + hintTypeMap.put(RECOMMENDATION_ADDRESS, false); + hintTypeMap.put(FALSE_POSITIVE, true); caseInSensitiveMap.put(VERTEBRATE, true); caseInSensitiveMap.put(ADDRESS, false); @@ -245,6 +274,24 @@ public class RedactionIntegrationTest { caseInSensitiveMap.put(PUBLISHED_INFORMATION, true); caseInSensitiveMap.put(TEST_METHOD, false); caseInSensitiveMap.put(PII, false); + caseInSensitiveMap.put(RECOMMENDATION_AUTHOR, false); + caseInSensitiveMap.put(RECOMMENDATION_ADDRESS, false); + caseInSensitiveMap.put(FALSE_POSITIVE, false); + + recommendationTypeMap.put(VERTEBRATE, false); + recommendationTypeMap.put(ADDRESS, false); + recommendationTypeMap.put(AUTHOR, false); + recommendationTypeMap.put(SPONSOR, false); + recommendationTypeMap.put(NO_REDACTION_INDICATOR, false); + recommendationTypeMap.put(REDACTION_INDICATOR, false); + recommendationTypeMap.put(HINT_ONLY, false); + recommendationTypeMap.put(MUST_REDACT, false); + recommendationTypeMap.put(PUBLISHED_INFORMATION, false); + recommendationTypeMap.put(TEST_METHOD, false); + recommendationTypeMap.put(PII, false); + recommendationTypeMap.put(RECOMMENDATION_AUTHOR, true); + recommendationTypeMap.put(RECOMMENDATION_ADDRESS, true); + recommendationTypeMap.put(FALSE_POSITIVE, false); colors.setDefaultColor("#acfc00"); colors.setNotRedacted("#cccccc"); @@ -262,6 +309,7 @@ public class RedactionIntegrationTest { .hexColor(typeColor.getValue()) .isHint(hintTypeMap.get(typeColor.getKey())) .isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey())) + .isRecommendation(recommendationTypeMap.get(typeColor.getKey())) .build()) .collect(Collectors.toList()); @@ -275,6 +323,7 @@ public class RedactionIntegrationTest { .entries(dictionary.get(type)) .isHint(hintTypeMap.get(type)) .isCaseInsensitive(caseInSensitiveMap.get(type)) + .isRecommendation(recommendationTypeMap.get(type)) .build(); } @@ -333,7 +382,7 @@ public class RedactionIntegrationTest { System.out.println("redactionTest"); long start = System.currentTimeMillis(); - ClassPathResource pdfFileResource = new ClassPathResource("files/Primicarb/74 Pirimicarb_RAR_01_Volume_1_2017-12-04.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf"); RedactionRequest request = RedactionRequest.builder() .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) @@ -342,6 +391,12 @@ public class RedactionIntegrationTest { RedactionResult result = redactionController.redact(request); + result.getRedactionLog().getRedactionLogEntry().forEach(entry -> { + if(entry.isRecommendation()){ + System.out.println(entry.getValue()); + } + }); + try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Redacted.pdf")) { fileOutputStream.write(result.getDocument()); } @@ -467,7 +522,7 @@ public class RedactionIntegrationTest { public void htmlTablesTest() throws IOException { System.out.println("htmlTablesTest"); - ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/line_breaks.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21.pdf"); RedactionRequest request = RedactionRequest.builder() .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java index 0d9cf1ab..f15143c8 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java @@ -14,8 +14,11 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; +import java.util.List; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.commons.io.IOUtils; import org.apache.pdfbox.pdmodel.PDDocument; @@ -446,7 +449,7 @@ public class EntityRedactionServiceTest { " when\n" + " Section(rowEquals(\"Vertebrate study Y/N\", \"N\") || rowEquals(\"Vertebrate study Y/N\", \"No\"))\n" + " then\n" + - " section.redactNotCell(\"Author(s)\", 8, \"name\", \"Not redacted because row is not a vertebrate study\");\n" + + " section.redactNotCell(\"Author(s)\", 8, \"name\", false, \"Not redacted because row is not a vertebrate study\");\n" + " section.redactNot(\"address\", 8, \"Not redacted because row is not a vertebrate study\");\n" + " section.highlightCell(\"Vertebrate study Y/N\", 8, \"hint_only\");\n" + " end\n" + @@ -455,7 +458,7 @@ public class EntityRedactionServiceTest { " Section(rowEquals(\"Vertebrate study Y/N\", \"Y\") || rowEquals(\"Vertebrate study Y/N\", " + "\"Yes\"))\n" + " then\n" + - " section.redactCell(\"Author(s)\", 9, \"name\", \"Redacted because row is a vertebrate study\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" + + " section.redactCell(\"Author(s)\", 9, \"name\", false, \"Redacted because row is a vertebrate study\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" + " section.redact(\"address\", 9, \"Redacted because row is a vertebrate study\", \"Reg (EC) No" + " 1107/2009 Art. 63 (2g)\");\n" + " section.highlightCell(\"Vertebrate study Y/N\", 9, \"must_redact\");\n" + @@ -510,4 +513,28 @@ public class EntityRedactionServiceTest { } } + + @Test + public void testAuthorSplitting(){ + + String word = "Porch JR, " + "Kendall TZ, " + "Krueger HO"; + + word.replaceAll(",", " ").replaceAll(" ", " "); + + Pattern pattern = Pattern.compile("[A-ZÄÖÜ][\\wäöüéèê]{2,}( [A-ZÄÖÜ]{1,2}\\.)+"); + Matcher matcher = pattern.matcher(word); + + List allMatches = new ArrayList<>(); + while (matcher.find()) { + allMatches.add(matcher.group()); + } + + for(String name: allMatches) { + if(name.length() >= 3) { + System.out.println(name); +// dictionaryService.addToLocalDictionary(type, name); + } + } + } + } \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_author.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_author.txt index 0e697600..766ebd06 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_author.txt +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_author.txt @@ -8586,4 +8586,4 @@ Zoriki Hosomi R. Zoriki Hosomi Rosana Zuberer D Zubrod J -Zwicker R.E. +Zwicker R.E. \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/false_positive.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/false_positive.txt new file mode 100644 index 00000000..95da0f83 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/false_positive.txt @@ -0,0 +1,2 @@ +Long-term +Brown liquid \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/must_redact.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/must_redact.txt index 832fdb5b..e69de29b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/must_redact.txt +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/must_redact.txt @@ -1 +0,0 @@ -determination of residues diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/recommendation_CBI_address.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/recommendation_CBI_address.txt new file mode 100644 index 00000000..e69de29b diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/recommendation_CBI_author.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/recommendation_CBI_author.txt new file mode 100644 index 00000000..e69de29b diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/vertebrate.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/vertebrate.txt index 890fef06..207e8a49 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/vertebrate.txt +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/vertebrate.txt @@ -167,7 +167,6 @@ sheepshead minnow sheepshead minnows shrew shrews -Singh sorex araneus spea multiplicata spotted march frog diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index fa8dd95e..3684402e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -56,7 +56,8 @@ rule "6: Not redacted because Vertebrate Study = N" when Section(rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No")) then - section.redactNotCell("Author(s)", 6, "CBI_author", "Not redacted because row is not a vertebrate study"); + section.redactNotCell("Author(s)", 6, "CBI_author", true, "Not redacted because row is not a vertebrate study"); + section.redactNot("CBI_author", 6, "Not redacted because row is not a vertebrate study"); section.redactNot("CBI_address", 6, "Not redacted because row is not a vertebrate study"); section.highlightCell("Vertebrate study Y/N", 6, "hint_only"); end @@ -75,7 +76,7 @@ rule "8: Redact Authors and Addresses in Reference Table if it is a Vertebrate s when Section(rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes")) then - section.redactCell("Author(s)", 8, "CBI_author", "Redacted because row is a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + section.redactCell("Author(s)", 8, "CBI_author", true, "Redacted because row is a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)"); section.redact("CBI_address", 8, "Redacted because row is a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)"); section.highlightCell("Vertebrate study Y/N", 8, "must_redact"); end @@ -95,10 +96,12 @@ rule "10: Redact determination of residues" Section(searchText.toLowerCase.contains("determination of residues") && ( searchText.toLowerCase.contains("livestock") || searchText.toLowerCase.contains("live stock") || - searchText.toLowerCase.contains("egg") || - searchText.toLowerCase.contains("milk") || + searchText.toLowerCase.contains("tissue") || + searchText.toLowerCase.contains("liver") || + searchText.toLowerCase.contains("muscle") || searchText.toLowerCase.contains("bovine") || - searchText.toLowerCase.contains("ruminant") + searchText.toLowerCase.contains("ruminant") || + searchText.toLowerCase.contains("ruminants") )) then section.redact("CBI_author", 10, "Determination of residues was found.", "Reg (EC) No 1107/2009 Art. 63 (2g)"); @@ -106,25 +109,38 @@ rule "10: Redact determination of residues" section.addHintAnnotation("determination of residues", "must_redact"); section.addHintAnnotation("livestock", "must_redact"); section.addHintAnnotation("live stock", "must_redact"); - section.addHintAnnotation("egg", "must_redact"); - section.addHintAnnotation("milk", "must_redact"); + section.addHintAnnotation("tissue", "must_redact"); + section.addHintAnnotation("liver", "must_redact"); + section.addHintAnnotation("muscle", "must_redact"); section.addHintAnnotation("bovine", "must_redact"); section.addHintAnnotation("ruminant", "must_redact"); + section.addHintAnnotation("ruminants", "must_redact"); end +rule "11: Redact if CTL/* or BL/* was found" + when + Section(searchText.contains("CTL/") || searchText.contains("BL/")) + then + section.redact("CBI_author", 11, "Laboraty for vertebrate studies found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + section.redact("CBI_address", 11, "Laboraty for vertebrate studies found", "Reg (EC) No 1107/2009 Art. 63 (2g)"); + section.addHintAnnotation("CTL", "must_redact"); + section.addHintAnnotation("BL", "must_redact"); + end + + // --------------------------------------- PII rules ------------------------------------------------------------------- -rule "11: Redacted PII Personal Identification Information" +rule "12: Redacted PII Personal Identification Information" when Section(matchesType("PII")) then - section.redact("PII", 11, "PII (Personal Identification Information) found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redact("PII", 12, "PII (Personal Identification Information) found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); end -rule "12: Redact contact information" +rule "13: Redact contact information" when Section(text.contains("Contact point:") || text.contains("Phone:") @@ -142,96 +158,96 @@ rule "12: Redact contact information" || text.contains("Telephone:") || text.contains("European contact:")) then - section.redactLineAfter("Contact point:", "PII", 12, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Phone:", "PII", 12, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Fax:", "PII", 12, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Tel.:", "PII", 12, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Tel:", "PII", 12, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("E-mail:", "PII", 12, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Email:", "PII", 12, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("e-mail:", "PII", 12, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("E-mail address:", "PII", 12, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Contact:", "PII", 12, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Alternative contact:", "PII", 12, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Telephone number:", "PII", 12, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Telephone No:", "PII", 12, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Fax number:", "PII", 12, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Telephone:", "PII", 12, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactBetween("No:", "Fax", "PII", 12, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactBetween("Contact:", "Tel.:", "PII", 12, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("European contact:", "PII", 12, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Contact point:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Phone:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Fax:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Tel.:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Tel:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("E-mail:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Email:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("e-mail:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("E-mail address:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Contact:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Alternative contact:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Telephone number:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Telephone No:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Fax number:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Telephone:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactBetween("No:", "Fax", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactBetween("Contact:", "Tel.:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("European contact:", "PII", 13, true, "Contact information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); end -rule "13: Redact contact information if applicant is found" +rule "14: Redact contact information if applicant is found" when Section(headlineContainsWord("applicant") || text.contains("Applicant") || headlineContainsWord("Primary contact") || headlineContainsWord("Alternative contact") || text.contains("Contact:") || text.contains("Telephone number:")) then - section.redactLineAfter("Contact point:", "PII", 13, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Phone:", "PII", 13, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Fax:", "PII", 13, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Tel.:", "PII", 13, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Tel:", "PII", 13, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("E-mail:", "PII", 13, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Email:", "PII", 13, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("e-mail:", "PII", 13, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("E-mail address:", "PII", 13, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Contact:", "PII", 13, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Alternative contact:", "PII", 13, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Telephone number:", "PII", 13, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Telephone No:", "PII", 13, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Fax number:", "PII", 13, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Telephone:", "PII", 13, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactBetween("No:", "Fax", "PII", 13, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactBetween("Contact:", "Tel.:", "PII", 13, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("European contact:", "PII", 13, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Contact point:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Phone:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Fax:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Tel.:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Tel:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("E-mail:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Email:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("e-mail:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("E-mail address:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Contact:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Alternative contact:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Telephone number:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Telephone No:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Fax number:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Telephone:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactBetween("No:", "Fax", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactBetween("Contact:", "Tel.:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("European contact:", "PII", 14, true, "Applicant information was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); end -rule "14: Redact contact information if Producer is found" +rule "15: Redact contact information if Producer is found" when Section(text.toLowerCase().contains("producer of the plant protection") || text.toLowerCase().contains("producer of the active substance") || text.contains("Manufacturer of the active substance") || text.contains("Manufacturer:") || text.contains("Producer or producers of the active substance")) then - section.redactLineAfter("Contact:", "PII", 14, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Telephone:", "PII", 14, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Phone:", "PII", 14, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Fax:", "PII", 14, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("E-mail:", "PII", 14, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Contact:", "PII", 14, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Fax number:", "PII", 14, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Telephone number:", "PII", 14, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactLineAfter("Tel:", "PII", 14, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); - section.redactBetween("No:", "Fax", "PII", 14, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Contact:", "PII", 15, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Telephone:", "PII", 15, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Phone:", "PII", 15, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Fax:", "PII", 15, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("E-mail:", "PII", 15, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Contact:", "PII", 15, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Fax number:", "PII", 15, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Telephone number:", "PII", 15, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLineAfter("Tel:", "PII", 15, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactBetween("No:", "Fax", "PII", 15, true, "Producer was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); end -rule "15: Redact AUTHOR(S)" +rule "16: Redact AUTHOR(S)" when Section(searchText.contains("AUTHOR(S):")) then - section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 15, true, "AUTHOR(S) was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 16, true, "AUTHOR(S) was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); end -rule "16: Redact PERFORMING LABORATORY" +rule "17: Redact PERFORMING LABORATORY" when Section(searchText.contains("PERFORMING LABORATORY:")) then - section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "PII", 16, true, "PERFORMING LABORATORY was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "PII", 17, true, "PERFORMING LABORATORY was found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); end -rule "17: Redact On behalf of Sequani Ltd.:" +rule "18: Redact On behalf of Sequani Ltd.:" when Section(searchText.contains("On behalf of Sequani Ltd.: Name Title")) then - section.redactBetween("On behalf of Sequani Ltd.: Name Title", "On behalf of", "PII", 17, false , "PII (Personal Identification Information) found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactBetween("On behalf of Sequani Ltd.: Name Title", "On behalf of", "PII", 18, false , "PII (Personal Identification Information) found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); end -rule "18: Redact On behalf of Syngenta Ltd.:" +rule "19: Redact On behalf of Syngenta Ltd.:" when Section(searchText.contains("On behalf of Syngenta Ltd.: Name Title")) then - section.redactBetween("On behalf of Syngenta Ltd.: Name Title", "Study dates", "PII", 18, false , "PII (Personal Identification Information) found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); + section.redactBetween("On behalf of Syngenta Ltd.: Name Title", "Study dates", "PII", 19, false , "PII (Personal Identification Information) found", "Reg (EC) No 1107/2009 Art. 63 (2e)"); end \ No newline at end of file