RED-101: Add features as requested in PR

- Add rule for non-vertebrate studies
- Don't highlight ambiguous cell values
- Rewrite rules in more human-readable fashion
This commit is contained in:
Thierry Göckel 2020-08-18 20:35:40 +02:00
parent c7f5b4a280
commit 1fff4f7eb0
10 changed files with 216 additions and 140 deletions

View File

@ -3,6 +3,8 @@ package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.util.ArrayList;
import java.util.List;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import lombok.Data;
import lombok.EqualsAndHashCode;
@ -16,6 +18,7 @@ public class Entity {
private boolean redaction;
private String redactionReason;
private List<EntityPositionSequence> positionSequences = new ArrayList<>();
private List<TextPositionSequence> targetSequences;
private Integer start;
private Integer end;

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
@ -25,9 +26,16 @@ public class SearchableText {
}
@SuppressWarnings("checkstyle:ModifiedControlVariable")
public List<EntityPositionSequence> getSequences(String searchString, boolean caseInsensitive) {
return getSequences(searchString, caseInsensitive, null);
}
@SuppressWarnings("checkstyle:ModifiedControlVariable")
public List<EntityPositionSequence> getSequences(String searchString, boolean caseInsensitive,
List<TextPositionSequence> sequencesSubList) {
String normalizedSearchString;
if (caseInsensitive) {
normalizedSearchString = searchString.toLowerCase();
@ -40,37 +48,50 @@ public class SearchableText {
List<TextPositionSequence> crossSequenceParts = new ArrayList<>();
List<EntityPositionSequence> finalMatches = new ArrayList<>();
for (int i = 0; i < sequences.size(); i++) {
TextPositionSequence partMatch = new TextPositionSequence(sequences.get(i).getPage());
for (int j = 0; j < sequences.get(i).length(); j++) {
if (i > 0 && j == 0 && sequences.get(i).charAt(0, caseInsensitive) == ' ' && sequences.get(i - 1)
.charAt(sequences.get(i - 1).length() - 1, caseInsensitive) == ' ' || j > 0 && sequences.get(i)
.charAt(j, caseInsensitive) == ' ' && sequences.get(i).charAt(j - 1, caseInsensitive) == ' ') {
if (j == sequences.get(i).length() - 1 && counter != 0 && !partMatch.getTextPositions().isEmpty()) {
List<TextPositionSequence> searchSpace;
if (sequencesSubList != null) {
int subListIndex = Collections.indexOfSubList(sequences, sequencesSubList);
if (subListIndex != -1) {
searchSpace = sequences.subList(subListIndex, subListIndex + sequencesSubList.size());
} else {
searchSpace = sequences;
}
} else {
searchSpace = sequences;
}
for (int i = 0; i < searchSpace.size(); i++) {
TextPositionSequence partMatch = new TextPositionSequence(searchSpace.get(i).getPage());
for (int j = 0; j < searchSpace.get(i).length(); j++) {
if (i > 0 && j == 0 && searchSpace.get(i).charAt(0, caseInsensitive) == ' ' && searchSpace.get(i - 1)
.charAt(searchSpace.get(i - 1).length() - 1, caseInsensitive) == ' ' || j > 0 && searchSpace.get(i)
.charAt(j, caseInsensitive) == ' ' && searchSpace.get(i).charAt(j - 1, caseInsensitive) == ' ') {
if (j == searchSpace.get(i).length() - 1 && counter != 0 && !partMatch.getTextPositions().isEmpty()) {
crossSequenceParts.add(partMatch);
}
continue;
}
if (j == 0 && sequences.get(i).charAt(j, caseInsensitive) != ' ' && i != 0 && sequences.get(i - 1)
.charAt(sequences.get(i - 1)
if (j == 0 && searchSpace.get(i).charAt(j, caseInsensitive) != ' ' && i != 0 && searchSpace.get(i - 1)
.charAt(searchSpace.get(i - 1)
.length() - 1, caseInsensitive) != ' ' && searchChars[counter] == ' ') {
counter++;
}
if (sequences.get(i)
.charAt(j, caseInsensitive) == searchChars[counter] || counter != 0 && sequences.get(i)
if (searchSpace.get(i)
.charAt(j, caseInsensitive) == searchChars[counter] || counter != 0 && searchSpace.get(i)
.charAt(j, caseInsensitive) == '-') {
if (counter != 0 || i == 0 && j == 0 || j != 0 && isSeparator(sequences.get(i)
.charAt(j - 1, caseInsensitive)) || j == 0 && i != 0 && isSeparator(sequences.get(i - 1)
.charAt(sequences.get(i - 1)
.length() - 1, caseInsensitive)) || j == 0 && i != 0 && sequences.get(i - 1)
.charAt(sequences.get(i - 1).length() - 1, caseInsensitive) != ' ' && sequences.get(i)
if (counter != 0 || i == 0 && j == 0 || j != 0 && isSeparator(searchSpace.get(i)
.charAt(j - 1, caseInsensitive)) || j == 0 && i != 0 && isSeparator(searchSpace.get(i - 1)
.charAt(searchSpace.get(i - 1)
.length() - 1, caseInsensitive)) || j == 0 && i != 0 && searchSpace.get(i - 1)
.charAt(searchSpace.get(i - 1).length() - 1, caseInsensitive) != ' ' && searchSpace.get(i)
.charAt(j, caseInsensitive) != ' ') {
partMatch.add(sequences.get(i).textPositionAt(j));
if (!(j == sequences.get(i).length() - 1 && sequences.get(i)
partMatch.add(searchSpace.get(i).textPositionAt(j));
if (!(j == searchSpace.get(i).length() - 1 && searchSpace.get(i)
.charAt(j, caseInsensitive) == '-' && searchChars[counter] != '-')) {
counter++;
}
@ -79,19 +100,19 @@ public class SearchableText {
if (counter == searchString.length()) {
crossSequenceParts.add(partMatch);
if (i == sequences.size() - 1 && j == sequences.get(i).length() - 1 || j != sequences.get(i)
.length() - 1 && isSeparator(sequences.get(i)
.charAt(j + 1, caseInsensitive)) || j == sequences.get(i)
.length() - 1 && isSeparator(sequences.get(i + 1)
.charAt(0, caseInsensitive)) || j == sequences.get(i).length() - 1 && sequences.get(i)
.charAt(j, caseInsensitive) != ' ' && sequences.get(i + 1)
if (i == searchSpace.size() - 1 && j == searchSpace.get(i).length() - 1 || j != searchSpace.get(i)
.length() - 1 && isSeparator(searchSpace.get(i)
.charAt(j + 1, caseInsensitive)) || j == searchSpace.get(i)
.length() - 1 && isSeparator(searchSpace.get(i + 1)
.charAt(0, caseInsensitive)) || j == searchSpace.get(i).length() - 1 && searchSpace.get(i)
.charAt(j, caseInsensitive) != ' ' && searchSpace.get(i + 1)
.charAt(0, caseInsensitive) != ' ') {
finalMatches.addAll(buildEntityPositionSequence(crossSequenceParts));
}
counter = 0;
crossSequenceParts = new ArrayList<>();
partMatch = new TextPositionSequence(sequences.get(i).getPage());
partMatch = new TextPositionSequence(searchSpace.get(i).getPage());
}
} else {
counter = 0;
@ -99,16 +120,17 @@ public class SearchableText {
j--;
}
crossSequenceParts = new ArrayList<>();
partMatch = new TextPositionSequence(sequences.get(i).getPage());
partMatch = new TextPositionSequence(searchSpace.get(i).getPage());
}
if (j == sequences.get(i).length() - 1 && counter != 0) {
if (j == searchSpace.get(i).length() - 1 && counter != 0) {
crossSequenceParts.add(partMatch);
}
}
}
return finalMatches;
}

View File

@ -7,9 +7,10 @@ import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import lombok.Builder;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
@ -31,7 +32,21 @@ public class Section {
private int sectionNumber;
private Map<String, String> tabularData;
private Map<String, TextBlock> tabularData;
public boolean isVertebrateStudy() {
return tabularData != null
&& tabularData.containsKey("Vertebrate study Y/N")
&& tabularData.get("Vertebrate study Y/N").getText().equals("Y");
}
public boolean isNotVertebrateStudy() {
return tabularData != null
&& tabularData.containsKey("Vertebrate study Y/N")
&& tabularData.get("Vertebrate study Y/N").getText().equals("N");
}
public boolean contains(String type) {
@ -163,20 +178,16 @@ public class Section {
public void highlightCell(String cellHeader, int ruleNumber) {
String value = tabularData.get(cellHeader);
TextBlock value = tabularData.get(cellHeader);
if (value == null) {
log.warn("Could not find any data for {}.", cellHeader);
} else {
Set<Entity> found = findEntities(value, "must_redact");
if (CollectionUtils.isEmpty(found)) {
log.warn("Could not identify value {} in row.", value);
} else {
Entity entity = found.iterator().next();
entity.setRedaction(false);
entity.setMatchedRule(ruleNumber);
entity.setRedactionReason(cellHeader);
entities.add(entity);
}
Entity entity = new Entity(value.getText(), "must_redact", 0, value.getText().length(), headline, sectionNumber);
entity.setRedaction(false);
entity.setMatchedRule(ruleNumber);
entity.setRedactionReason(cellHeader);
entity.setTargetSequences(value.getSequences()); // Make sure no other cells with same content are highlighted
entities.add(entity);
}
}

View File

@ -53,7 +53,7 @@ public class EntityRedactionService {
for (Table table : tables) {
for (List<Cell> row : table.getRows()) {
SearchableText searchableRow = new SearchableText();
Map<String, String> tabularData = new HashMap<>();
Map<String, TextBlock> tabularData = new HashMap<>();
for (Cell cell : row) {
if (cell.isHeaderCell() || CollectionUtils.isEmpty(cell.getTextBlocks())) {
continue;
@ -63,11 +63,12 @@ public class EntityRedactionService {
String headerName = headerCell.getTextBlocks().get(0).getText()
.replaceAll("\n", " ")
.replaceAll(" ", " ");
tabularData.put(headerName, cell.getTextBlocks().get(0).getText());
tabularData.put(headerName, cell.getTextBlocks().get(0));
});
for (TextBlock textBlock : cell.getTextBlocks()) {
searchableRow.addAll(textBlock.getSequences());
}
}
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber);
@ -124,9 +125,9 @@ public class EntityRedactionService {
for (Entity entity : entities) {
if (dictionaryService.getCaseInsensitiveTypes().contains(entity.getType())) {
entity.setPositionSequences(text.getSequences(entity.getWord(), true));
entity.setPositionSequences(text.getSequences(entity.getWord(), true, entity.getTargetSequences()));
} else {
entity.setPositionSequences(text.getSequences(entity.getWord(), false));
entity.setPositionSequences(text.getSequences(entity.getWord(), false, entity.getTargetSequences()));
}
}

View File

@ -247,40 +247,29 @@ public class Table extends AbstractTextContainer {
List<Cell> row = rowsOfCells.get(i);
Iterator<Cell> rowCells = row.iterator();
int startColumn = 0;
// int jumpToColumn = 0;
int jumpToColumn = 0;
while (rowCells.hasNext()) {
Cell cell = rowCells.next();
if (i > 0) {
// Rectangle rectangle = new Rectangle(cell.getBottom(),
// si.getBounds().getLeft(),
// cell.getLeft() - si.getBounds().getLeft() + 1,
// si.getBounds().getBottom() - cell.getBottom());
// List<List<Cell>> others = rowsOfCells(si.contains(rectangle));
//
// for (List<Cell> r : others) {
// jumpToColumn = Math.max(jumpToColumn, r.size());
// }
//
// while (startColumn != jumpToColumn) {
// add(previousNonNullCellForColumnIndex.get(startColumn), i, startColumn);
// startColumn++;
// }
Rectangle rectangle = new Rectangle(cell.getBottom(),
si.getBounds().getLeft(),
cell.getLeft() - si.getBounds().getLeft() + 1,
si.getBounds().getBottom() - cell.getBottom());
List<List<Cell>> others = rowsOfCells(si.contains(rectangle));
// Handle cells spanning several rows
while (previousNonNullCellForColumnIndex.get(startColumn) != null) {
Cell previouslyAddedCellForSameColumn = previousNonNullCellForColumnIndex.get(startColumn);
float previousRight = previouslyAddedCellForSameColumn.getRight();
float thisLeft = cell.getLeft();
if (previousRight > thisLeft) {
break;
}
for (List<Cell> r : others) {
jumpToColumn = Math.max(jumpToColumn, r.size());
}
while (startColumn != jumpToColumn) {
add(previousNonNullCellForColumnIndex.get(startColumn), i, startColumn);
startColumn++;
}
}
add(cell, i, startColumn);
previousNonNullCellForColumnIndex.put(startColumn, cell);
startColumn++;
// jumpToColumn = startColumn;
jumpToColumn = startColumn;
}
}
}

View File

@ -227,6 +227,7 @@ public class RedactionIntegrationTest {
@Test
public void noExceptionShouldBeThrownForAnyFiles() throws IOException {
System.out.println("noExceptionShouldBeThrownForAnyFiles");
ClassLoader loader = getClass().getClassLoader();
URL url = loader.getResource("files");
File[] files = new File(url.getPath()).listFiles();
@ -266,6 +267,7 @@ public class RedactionIntegrationTest {
@Test
public void redactionTest() throws IOException {
System.out.println("redactionTest");
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Trinexapac/96 Trinexapac-ethyl_RAR_09_Volume_3CA_B-7_2018-02-23.pdf");
@ -289,8 +291,9 @@ public class RedactionIntegrationTest {
@Test
public void testTableRedaction() throws IOException {
System.out.println("testTableRedaction");
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
RedactionRequest request = RedactionRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
@ -311,6 +314,7 @@ public class RedactionIntegrationTest {
@Test
public void testManualRedaction() throws IOException {
System.out.println("testManualRedaction");
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
@ -345,6 +349,7 @@ public class RedactionIntegrationTest {
@Test
public void classificationTest() throws IOException {
System.out.println("classificationTest");
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 " +
"Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
@ -363,6 +368,7 @@ public class RedactionIntegrationTest {
@Test
public void sectionsTest() throws IOException {
System.out.println("sectionsTest");
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 " +
"Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
@ -381,6 +387,7 @@ public class RedactionIntegrationTest {
@Test
public void htmlTablesTest() throws IOException {
System.out.println("htmlTablesTest");
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 " +
"Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
@ -399,6 +406,7 @@ public class RedactionIntegrationTest {
@Test
public void htmlTableRotationTest() throws IOException {
System.out.println("htmlTableRotationTest");
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S" +
"-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");

View File

@ -15,6 +15,7 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.io.IOUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
@ -47,14 +48,15 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
@RunWith(SpringRunner.class)
@SpringBootTest
@RunWith(SpringRunner.class)
public class EntityRedactionServiceTest {
private static final String DEFAULT_RULES = loadFromClassPath("drools/rules.drl");
private static final String NAME_CODE = "name";
private static final String ADDRESS_CODE = "address";
private static final AtomicLong DICTIONARY_VERSION = new AtomicLong();
@MockBean
private DictionaryClient dictionaryClient;
@ -117,6 +119,7 @@ public class EntityRedactionServiceTest {
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(Arrays.asList("Casey, H.W.", "OLoughlin, C.K.", "Salamon, C.M.", "Smith, S.H."))
.build();
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA"))
@ -133,45 +136,80 @@ public class EntityRedactionServiceTest {
@Test
public void complexTable() throws IOException {
public void testTrueNegativesInTable() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Complex Table.pdf");
RedactionRequest redactionRequest = RedactionRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
ClassPathResource pdfFileResource = new ClassPathResource("files/Cyprodinil/40 Cyprodinil - EU AIR3 - LCA Section 1" +
" Supplement - Identity of the active substance - Reference list.pdf");
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/names.txt")))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE))
.thenReturn(DictionaryResponse.builder().entries(new ArrayList<>()).build());
when(dictionaryClient.getDictionaryForType(NAME_CODE))
.thenReturn(DictionaryResponse.builder().entries(new ArrayList<>()).build());
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/addresses.txt")))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
entityRedactionService.processDocument(classifiedDoc, null);
assertThat(classifiedDoc.getEntities()
.entrySet()
.stream()
.noneMatch(entry -> entry.getValue().stream().anyMatch(e -> e.getMatchedRule() == 9))).isTrue();
}
pdfFileResource = new ClassPathResource("files/Compounds/27 A8637C - EU AIR3 - MCP Section 1 - Identity of " +
"the plant protection product.pdf");
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
entityRedactionService.processDocument(classifiedDoc, null);
assertThat(classifiedDoc.getEntities()
.entrySet()
.stream()
.noneMatch(entry -> entry.getValue().stream().anyMatch(e -> e.getMatchedRule() == 9))).isTrue();
}
}
@Test
public void testFalsePositiveInWrongCell() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Row With Ambiguous Redaction.pdf");
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/names.txt")))
.build();
when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/addresses.txt")))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
entityRedactionService.processDocument(classifiedDoc, null);
assertThat(classifiedDoc.getEntities()).hasSize(1); // two pages
assertThat(classifiedDoc.getEntities().get(1).stream()
.filter(entity -> entity.getMatchedRule() == 9)
.count()).isEqualTo(10);
}
}
@Test
public void headerPropagation() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Header Propagation.pdf");
RedactionRequest redactionRequest = RedactionRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
.build();
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(Arrays.asList("Bissig R.", "Thanei P."))
.build();
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland"))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
entityRedactionService.processDocument(classifiedDoc, null);
assertThat(classifiedDoc.getEntities()).hasSize(2); // two pages
@ -190,10 +228,7 @@ public class EntityRedactionServiceTest {
"global Section section\n" +
"rule \"9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study\"\n" +
" when\n" +
" Section(tabularData != null\n" +
" && tabularData.containsKey(\"Vertebrate study Y/N\")\n" +
" && tabularData.get(\"Vertebrate study Y/N\").equals(\"Y\")\n" +
" )\n" +
" Section(isVertebrateStudy())\n" +
" then\n" +
" section.redact(\"name\", 9, \"Redacted because row is a vertebrate study\");\n" +
" section.redact(\"address\", 9, \"Redacted because rows is a vertebrate study\");\n" +
@ -206,10 +241,12 @@ public class EntityRedactionServiceTest {
TypeResult.builder().type(NAME_CODE).color(new float[]{1, 1, 0}).build(),
TypeResult.builder().type(ADDRESS_CODE).color(new float[]{0, 1, 1}).build()))
.build();
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getAllTypes()).thenReturn(typeResponse);
when(dictionaryClient.getDefaultColor()).thenReturn(new DefaultColor());
}
private static String loadFromClassPath(String path) {
URL resource = ResourceLoader.class.getClassLoader().getResource(path);

View File

@ -49,64 +49,69 @@ rule "5: Do not redact in guideline sections"
section.redactNot("address", 5, "Section is a guideline section.");
end
rule "6: Redact if must redact entry is found"
when
eval(section.contains("must_redact")==true);
then
section.redact("name", 6, "must_redact entry was found.");
section.redact("address", 6, "must_redact entry was found.");
end
rule "7: Redact contact information, if applicant is found"
rule "6: Redact contact information, if applicant is found"
when
eval(section.headlineContainsWord("applicant") || section.getText().contains("Applicant"));
then
section.redactLineAfter("Name:", "address", 7, "Applicant information was found");
section.redactBetween("Address:", "Contact", "address", 7, "Applicant information was found");
section.redactLineAfter("Contact point:", "address", 7, "Applicant information was found");
section.redactLineAfter("Phone:", "address", 7, "Applicant information was found");
section.redactLineAfter("Fax:", "address", 7, "Applicant information was found");
section.redactLineAfter("Tel.:", "address", 7, "Applicant information was found");
section.redactLineAfter("Tel:", "address", 7, "Applicant information was found");
section.redactLineAfter("E-mail:", "address", 7, "Applicant information was found");
section.redactLineAfter("Email:", "address", 7, "Applicant information was found");
section.redactLineAfter("Contact:", "address", 7, "Applicant information was found");
section.redactLineAfter("Telephone number:", "address", 7, "Applicant information was found");
section.redactLineAfter("Fax number:", "address", 7, "Applicant information was found");
section.redactLineAfter("Telephone:", "address", 7, "Applicant information was found");
section.redactBetween("No:", "Fax", "address", 7, "Applicant information was found");
section.redactBetween("Contact:", "Tel.:", "address", 7, "Applicant information was found");
section.redactLineAfter("Name:", "address", 6, "Applicant information was found");
section.redactBetween("Address:", "Contact", "address", 6, "Applicant information was found");
section.redactLineAfter("Contact point:", "address", 6, "Applicant information was found");
section.redactLineAfter("Phone:", "address", 6, "Applicant information was found");
section.redactLineAfter("Fax:", "address", 6, "Applicant information was found");
section.redactLineAfter("Tel.:", "address", 6, "Applicant information was found");
section.redactLineAfter("Tel:", "address", 6, "Applicant information was found");
section.redactLineAfter("E-mail:", "address", 6, "Applicant information was found");
section.redactLineAfter("Email:", "address", 6, "Applicant information was found");
section.redactLineAfter("Contact:", "address", 6, "Applicant information was found");
section.redactLineAfter("Telephone number:", "address", 6, "Applicant information was found");
section.redactLineAfter("Fax number:", "address", 6, "Applicant information was found");
section.redactLineAfter("Telephone:", "address", 6, "Applicant information was found");
section.redactBetween("No:", "Fax", "address", 6, "Applicant information was found");
section.redactBetween("Contact:", "Tel.:", "address", 6, "Applicant information was found");
end
rule "8: Redact contact information, if Producer is found"
rule "7: Redact contact information, if Producer is found"
when
eval(section.getText().toLowerCase().contains("producer of the plant protection") || section.getText().toLowerCase().contains("producer of the active substance") || section.getText().contains("Manufacturer of the active substance") || section.getText().contains("Manufacturer:") || section.getText().contains("Producer or producers of the active substance"));
then
section.redactLineAfter("Name:", "address", 8, "Producer was found");
section.redactBetween("Address:", "Contact", "address", 8, "Producer was found");
section.redactBetween("Contact:", "Phone", "address", 8, "Producer was found");
section.redactBetween("Contact:", "Telephone number:", "address", 8, "Producer was found");
section.redactBetween("Address:", "Manufacturing", "address", 8, "Producer was found");
section.redactLineAfter("Telephone:", "address", 8, "Producer was found");
section.redactLineAfter("Phone:", "address", 8, "Producer was found");
section.redactLineAfter("Fax:", "address", 8, "Producer was found");
section.redactLineAfter("E-mail:", "address", 8, "Producer was found");
section.redactLineAfter("Contact:", "address", 8, "Producer was found");
section.redactLineAfter("Fax number:", "address", 8, "Producer was found");
section.redactLineAfter("Telephone number:", "address", 8, "Producer was found");
section.redactLineAfter("Tel:", "address", 8, "Producer was found");
section.redactBetween("No:", "Fax", "address", 8, "Producer was found");
section.redactLineAfter("Name:", "address", 7, "Producer was found");
section.redactBetween("Address:", "Contact", "address", 7, "Producer was found");
section.redactBetween("Contact:", "Phone", "address", 7, "Producer was found");
section.redactBetween("Contact:", "Telephone number:", "address", 7, "Producer was found");
section.redactBetween("Address:", "Manufacturing", "address", 7, "Producer was found");
section.redactLineAfter("Telephone:", "address", 7, "Producer was found");
section.redactLineAfter("Phone:", "address", 7, "Producer was found");
section.redactLineAfter("Fax:", "address", 7, "Producer was found");
section.redactLineAfter("E-mail:", "address", 7, "Producer was found");
section.redactLineAfter("Contact:", "address", 7, "Producer was found");
section.redactLineAfter("Fax number:", "address", 7, "Producer was found");
section.redactLineAfter("Telephone number:", "address", 7, "Producer was found");
section.redactLineAfter("Tel:", "address", 7, "Producer was found");
section.redactBetween("No:", "Fax", "address", 7, "Producer was found");
end
rule "9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study"
rule "8: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study"
when
Section(tabularData != null
&& tabularData.containsKey("Vertebrate study Y/N")
&& tabularData.get("Vertebrate study Y/N").equals("Y")
)
Section(isVertebrateStudy())
then
section.redact("name", 9, "Redacted because row is a vertebrate study");
section.redact("address", 9, "Redacted because rows is a vertebrate study");
section.redact("name", 8, "Redacted because row is a vertebrate study");
section.redact("address", 8, "Redacted because row is a vertebrate study");
section.highlightCell("Vertebrate study Y/N", 9);
end
end
rule "9: Not redacted because Vertebrate Study = N"
when
Section(isNotVertebrateStudy())
then
section.redactNot("name", 9, "Not redacted because row is not a vertebrate study");
section.redactNot("address", 9, "Not redacted because row is not a vertebrate study");
end
rule "10: Redact if must redact entry is found"
when
eval(section.contains("must_redact")==true);
then
section.redact("name", 10, "must_redact entry was found.");
section.redact("address", 10, "must_redact entry was found.");
end