RED-101: Detect vertebrate study row value

This commit is contained in:
Thierry Göckel 2020-08-10 13:30:10 +02:00
parent 06630b09d2
commit 00c96c6f57
3 changed files with 23 additions and 14 deletions

View File

@ -7,6 +7,7 @@ import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import lombok.Builder;
@ -100,7 +101,7 @@ public class Section {
if (values != null) {
for (String value : values) {
if (value != null && StringUtils.isNotBlank(value)) {
if (StringUtils.isNotBlank(value)) {
Set<Entity> found = findEntities(value.trim(), asType);
entities.addAll(found);
}
@ -160,14 +161,22 @@ public class Section {
}
public void highlightCell(String reason) {
public void highlightCell(String cellHeader, int ruleNumber) {
String value = tabularData.get(reason);
String value = tabularData.get(cellHeader);
if (value == null) {
log.warn("Could not find any data for {}.", reason);
log.warn("Could not find any data for {}.", cellHeader);
} else {
Entity entity = findEntities(value, "some type").iterator().next();
entity.setRedaction(false);
Set<Entity> found = findEntities(value, "must_redact");
if (CollectionUtils.isEmpty(found)) {
log.warn("Could not identify value {} in row.", value);
} else {
Entity entity = found.iterator().next();
entity.setRedaction(false);
entity.setMatchedRule(ruleNumber);
entity.setRedactionReason(cellHeader);
entities.add(entity);
}
}
}

View File

@ -117,16 +117,16 @@ public class EntityRedactionServiceTest {
"import com.iqser.red.service.redaction.v1.server.redaction.model.Section\n" +
"\n" +
"global Section section\n" +
"rule \"8: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study\"\n" +
"rule \"9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study\"\n" +
" when\n" +
" Section(tabularData != null && tabularData.size() > 0\n" +
" && tabularData.containsKey(\"Vertebrate\\nstudy Y/N\")\n" +
" && tabularData.get(\"Vertebrate\\nstudy Y/N\").equals(\"Y\")\n" +
" )\n" +
" then\n" +
" section.redact(\"name\", 8, \"Redacted because row is a vertebrate study\");\n" +
" section.redact(\"address\", 8, \"Redacted because rows is a vertebrate study\");\n" +
" section.highlightCell(\"Vertebrate\\nstudy Y/N\");\n" +
" section.redact(\"name\", 9, \"Redacted because row is a vertebrate study\");\n" +
" section.redact(\"address\", 9, \"Redacted because rows is a vertebrate study\");\n" +
" section.highlightCell(\"Vertebrate\\nstudy Y/N\", 9);\n" +
" end";
when(rulesClient.getVersion()).thenReturn(1L);
when(rulesClient.getRules()).thenReturn(new RulesResponse(tableRules));
@ -149,7 +149,7 @@ public class EntityRedactionServiceTest {
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
entityRedactionService.processDocument(classifiedDoc);
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1)).hasSize(4); // 4 out of 5 entities recognized on page 1
assertThat(classifiedDoc.getEntities().get(1)).hasSize(5); // 4 out of 5 entities recognized on page 1
}
}

View File

@ -106,7 +106,7 @@ rule "9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate
&& tabularData.get("Vertebrate\nstudy Y/N").equals("Y")
)
then
section.redact("name", 8, "Redacted because row is a vertebrate study");
section.redact("address", 8, "Redacted because rows is a vertebrate study");
section.highlightCell("Vertebrate\nstudy Y/N");
section.redact("name", 9, "Redacted because row is a vertebrate study");
section.redact("address", 9, "Redacted because rows is a vertebrate study");
section.highlightCell("Vertebrate\nstudy Y/N", 9);
end