RED-101: Detect vertebrate study row value
This commit is contained in:
parent
06630b09d2
commit
00c96c6f57
@ -7,6 +7,7 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import lombok.Builder;
|
||||
@ -100,7 +101,7 @@ public class Section {
|
||||
|
||||
if (values != null) {
|
||||
for (String value : values) {
|
||||
if (value != null && StringUtils.isNotBlank(value)) {
|
||||
if (StringUtils.isNotBlank(value)) {
|
||||
Set<Entity> found = findEntities(value.trim(), asType);
|
||||
entities.addAll(found);
|
||||
}
|
||||
@ -160,14 +161,22 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
public void highlightCell(String reason) {
|
||||
public void highlightCell(String cellHeader, int ruleNumber) {
|
||||
|
||||
String value = tabularData.get(reason);
|
||||
String value = tabularData.get(cellHeader);
|
||||
if (value == null) {
|
||||
log.warn("Could not find any data for {}.", reason);
|
||||
log.warn("Could not find any data for {}.", cellHeader);
|
||||
} else {
|
||||
Entity entity = findEntities(value, "some type").iterator().next();
|
||||
entity.setRedaction(false);
|
||||
Set<Entity> found = findEntities(value, "must_redact");
|
||||
if (CollectionUtils.isEmpty(found)) {
|
||||
log.warn("Could not identify value {} in row.", value);
|
||||
} else {
|
||||
Entity entity = found.iterator().next();
|
||||
entity.setRedaction(false);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
entity.setRedactionReason(cellHeader);
|
||||
entities.add(entity);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -117,16 +117,16 @@ public class EntityRedactionServiceTest {
|
||||
"import com.iqser.red.service.redaction.v1.server.redaction.model.Section\n" +
|
||||
"\n" +
|
||||
"global Section section\n" +
|
||||
"rule \"8: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study\"\n" +
|
||||
"rule \"9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study\"\n" +
|
||||
" when\n" +
|
||||
" Section(tabularData != null && tabularData.size() > 0\n" +
|
||||
" && tabularData.containsKey(\"Vertebrate\\nstudy Y/N\")\n" +
|
||||
" && tabularData.get(\"Vertebrate\\nstudy Y/N\").equals(\"Y\")\n" +
|
||||
" )\n" +
|
||||
" then\n" +
|
||||
" section.redact(\"name\", 8, \"Redacted because row is a vertebrate study\");\n" +
|
||||
" section.redact(\"address\", 8, \"Redacted because rows is a vertebrate study\");\n" +
|
||||
" section.highlightCell(\"Vertebrate\\nstudy Y/N\");\n" +
|
||||
" section.redact(\"name\", 9, \"Redacted because row is a vertebrate study\");\n" +
|
||||
" section.redact(\"address\", 9, \"Redacted because rows is a vertebrate study\");\n" +
|
||||
" section.highlightCell(\"Vertebrate\\nstudy Y/N\", 9);\n" +
|
||||
" end";
|
||||
when(rulesClient.getVersion()).thenReturn(1L);
|
||||
when(rulesClient.getRules()).thenReturn(new RulesResponse(tableRules));
|
||||
@ -149,7 +149,7 @@ public class EntityRedactionServiceTest {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1)).hasSize(4); // 4 out of 5 entities recognized on page 1
|
||||
assertThat(classifiedDoc.getEntities().get(1)).hasSize(5); // 4 out of 5 entities recognized on page 1
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -106,7 +106,7 @@ rule "9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate
|
||||
&& tabularData.get("Vertebrate\nstudy Y/N").equals("Y")
|
||||
)
|
||||
then
|
||||
section.redact("name", 8, "Redacted because row is a vertebrate study");
|
||||
section.redact("address", 8, "Redacted because rows is a vertebrate study");
|
||||
section.highlightCell("Vertebrate\nstudy Y/N");
|
||||
section.redact("name", 9, "Redacted because row is a vertebrate study");
|
||||
section.redact("address", 9, "Redacted because rows is a vertebrate study");
|
||||
section.highlightCell("Vertebrate\nstudy Y/N", 9);
|
||||
end
|
||||
Loading…
x
Reference in New Issue
Block a user