RED-101: Implement table cell and row redaction

This commit is contained in:
Thierry Göckel 2020-08-10 12:15:19 +02:00
parent 695564d162
commit 06630b09d2
8 changed files with 367 additions and 65 deletions

View File

@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
@ -10,8 +11,10 @@ import org.apache.commons.lang3.StringUtils;
import lombok.Builder;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
@Data
@Slf4j
@Builder
public class Section {
@ -27,6 +30,8 @@ public class Section {
private int sectionNumber;
private Map<String, String> tabularData;
public boolean contains(String type) {
@ -71,7 +76,7 @@ public class Section {
if (values != null) {
for (String value : values) {
if (StringUtils.isNotBlank(value)) {
Set<Entity> found = findEntity(value.trim(), asType);
Set<Entity> found = findEntities(value.trim(), asType);
entities.addAll(found);
}
}
@ -96,7 +101,7 @@ public class Section {
if (values != null) {
for (String value : values) {
if (value != null && StringUtils.isNotBlank(value)) {
Set<Entity> found = findEntity(value.trim(), asType);
Set<Entity> found = findEntities(value.trim(), asType);
entities.addAll(found);
}
}
@ -113,7 +118,7 @@ public class Section {
}
private Set<Entity> findEntity(String value, String asType) {
private Set<Entity> findEntities(String value, String asType) {
Set<Entity> found = new HashSet<>();
@ -154,4 +159,17 @@ public class Section {
return entities;
}
}
public void highlightCell(String reason) {
String value = tabularData.get(reason);
if (value == null) {
log.warn("Could not find any data for {}.", reason);
} else {
Entity entity = findEntities(value, "some type").iterator().next();
entity.setRedaction(false);
}
}
}

View File

@ -7,7 +7,10 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
@ -44,24 +47,30 @@ public class EntityRedactionService {
List<Table> tables = paragraph.getTables();
for (Table table : tables) {
List<String> metadata = table.getHeaders();
for (List<Cell> row : table.getRows()) {
SearchableText searchableRow = new SearchableText();
for (Cell column : row) {
if (column == null || column.getTextBlocks() == null) {
List<String> cellValues = new ArrayList<>();
for (Cell cell : row) {
if (cell == null || cell.getTextBlocks() == null) {
cellValues.add(null);
continue;
}
for (TextBlock textBlock : column.getTextBlocks()) {
cellValues.add(cell.getTextBlocks().get(0).getText());
for (TextBlock textBlock : cell.getTextBlocks()) {
searchableRow.addAll(textBlock.getSequences());
}
}
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber);
Map<String, String> tabularData = toMap(metadata, cellValues);
Section analysedRowSection = droolsExecutionService.executeRules(Section.builder()
.entities(rowEntities)
.text(searchableRow.getAsStringWithLinebreaks())
.searchText(searchableRow.toString())
.headline(table.getHeadline())
.sectionNumber(sectionNumber)
.tabularData(tabularData)
.build());
documentEntities.addAll(clearAndFindPositions(analysedRowSection.getEntities(), searchableRow));
@ -93,7 +102,8 @@ public class EntityRedactionService {
for (Map.Entry<Integer, List<EntityPositionSequence>> entry : sequenceOnPage.entrySet()) {
classifiedDoc.getEntities()
.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(),
entity.getRedactionReason(), entry
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber()));
}
}
@ -101,6 +111,12 @@ public class EntityRedactionService {
}
private Map<String, String> toMap(List<String> keys, List<String> values) {
return IntStream.range(0, keys.size()).boxed().collect(Collectors.toMap(keys::get, values::get));
}
private Set<Entity> clearAndFindPositions(Set<Entity> entities, SearchableText text) {
removeEntitiesContainedInLarger(entities);
@ -119,12 +135,14 @@ public class EntityRedactionService {
private Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber) {
Set<Entity> found = new HashSet<>();
if (StringUtils.isEmpty(searchableText.toString()) && StringUtils.isEmpty(headline)) {
return found;
}
String inputString = searchableText.toString();
String lowercaseInputString = inputString.toLowerCase();
Set<Entity> found = new HashSet<>();
for (Map.Entry<String, Set<String>> entry : dictionaryService.getDictionary().entrySet()) {
if (dictionaryService.getCaseInsensitiveTypes().contains(entry.getKey())) {
found.addAll(find(lowercaseInputString, entry.getValue(), entry.getKey(), headline, sectionNumber));
} else {
@ -151,7 +169,8 @@ public class EntityRedactionService {
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString
.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber));
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex,
headline, sectionNumber));
}
} while (startIndex > -1);
}

View File

@ -4,6 +4,8 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
@ -14,7 +16,6 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractT
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
@Service
@SuppressWarnings("all")
public class SectionsBuilderService {
public void buildSections(Document document) {
@ -25,6 +26,7 @@ public class SectionsBuilderService {
AbstractTextContainer prev = null;
String lastHeadline = "";
Table previousTable = null;
for (Page page : document.getPages()) {
for (AbstractTextContainer current : page.getTextBlocks()) {
@ -37,7 +39,7 @@ public class SectionsBuilderService {
if (prev != null && current.getClassification().startsWith("H ") || !document.isHeadlines()) {
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline);
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline, previousTable);
chunkBlock.setHeadline(lastHeadline);
lastHeadline = current.getText();
chunkBlockList.add(chunkBlock);
@ -51,17 +53,15 @@ public class SectionsBuilderService {
}
}
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline);
if (chunkBlock != null) {
chunkBlockList.add(chunkBlock);
chunkBlock.setHeadline(lastHeadline);
}
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline, previousTable);
chunkBlock.setHeadline(lastHeadline);
chunkBlockList.add(chunkBlock);
document.setParagraphs(chunkBlockList);
}
private Paragraph buildTextBlock(List<AbstractTextContainer> wordBlockList, String lastHeadline) {
private Paragraph buildTextBlock(List<AbstractTextContainer> wordBlockList, String lastHeadline, Table previousTable) {
Paragraph paragraph = new Paragraph();
TextBlock textBlock = null;
@ -76,19 +76,28 @@ public class SectionsBuilderService {
AbstractTextContainer container = itty.next();
if (container instanceof Table) {
Table table = (Table) container;
splitByTable = true;
if (previous != null && previous instanceof TextBlock && previous.getText().startsWith("Table ")) {
((Table) container).setHeadline(previous.getText());
if (previous != null && previous.getText().startsWith("Table ")) {
table.setHeadline(previous.getText());
} else {
((Table) container).setHeadline("Table in: " + lastHeadline);
table.setHeadline("Table in: " + lastHeadline);
}
// Distribute header information for subsequent tables
if (previousTable != null && hasInvalidHeaderInformation(table) && hasValidHeaderInformation(previousTable)) {
if ((previousTable.isVerticalHeader() && previousTable.getRowCount() == table.getRowCount()) || previousTable
.getColCount() == table.getColCount()) {
table.setHeaders(previousTable.getHeaders());
}
}
if (textBlock != null && !alreadyAdded) {
paragraph.getPageBlocks().add(textBlock);
alreadyAdded = true;
}
paragraph.getPageBlocks().add(container);
paragraph.getPageBlocks().add(table);
previousTable = table;
continue;
}
@ -125,4 +134,24 @@ public class SectionsBuilderService {
return paragraph;
}
}
private boolean hasValidHeaderInformation(Table table) {
return !hasInvalidHeaderInformation(table);
}
private boolean hasInvalidHeaderInformation(Table table) {
if (CollectionUtils.isEmpty(table.getHeaders())) {
return true;
}
if (table.getHeaders().stream().anyMatch(StringUtils::isEmpty)) {
return true;
}
return false;
}
}

View File

@ -8,32 +8,44 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.stream.Collectors;
import org.apache.commons.collections4.CollectionUtils;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
@SuppressWarnings("all")
@Slf4j
public class Table extends AbstractTextContainer {
private final TreeMap<CellPosition, Cell> cells = new TreeMap<>();
private RectangleSpatialIndex<Cell> si = new RectangleSpatialIndex<>();
private final RectangleSpatialIndex<Cell> si = new RectangleSpatialIndex<>();
@Getter
@Setter
private String headline;
@Getter
private int rowCount = 0;
private int rowCount;
@Getter
private int colCount = 0;
private int colCount;
private int rotation = 0;
private final int rotation;
private List<List<Cell>> memoizedRows = null;
private List<List<Cell>> rows;
@Getter
@Setter
private List<String> headers;
@Getter
private boolean verticalHeader;
public Table(List<Cell> cells, Rectangle area, int rotation) {
@ -47,16 +59,83 @@ public class Table extends AbstractTextContainer {
}
public List<List<Cell>> getRows() {
if (memoizedRows == null) {
memoizedRows = computeRows();
if (rows == null) {
rows = computeRows();
headers = computeHeaders();
}
return memoizedRows;
return rows;
}
/**
* Detect header cells (either first row or first column):
* Column is marked as header if cell text is bold and row cell text is not bold.
* Defaults to row.
*/
private List<String> computeHeaders() {
boolean allBold = true;
List<Cell> rowCells = rows.get(0);
for (Cell cell : rowCells) {
if (CollectionUtils.isNotEmpty(cell.getTextBlocks())) {
if (!cell.getTextBlocks().get(0).getMostPopularWordStyle().equals("bold")) {
allBold = false;
break;
}
}
}
if (!allBold) {
allBold = true;
List<Cell> firstColCells = new ArrayList<>();
for (List<Cell> row : rows) {
Cell firstInRow = row.get(0);
if (CollectionUtils.isNotEmpty(firstInRow.getTextBlocks())) {
if (!firstInRow.getTextBlocks().get(0).getMostPopularWordStyle().equals("bold")) {
allBold = false;
break;
}
}
firstColCells.add(firstInRow);
}
if (allBold) {
log.info("Headers are in first column");
verticalHeader = true;
return firstColCells.stream().map(cell -> {
if (CollectionUtils.isNotEmpty(cell.getTextBlocks())) {
return cell.getTextBlocks().get(0).getText();
} else {
return null;
}
}).collect(Collectors.toList());
} else {
log.info("Headers are defaulted in first row.");
return rowCells.stream().map(cell -> {
if (CollectionUtils.isNotEmpty(cell.getTextBlocks())) {
return cell.getTextBlocks().get(0).getText();
} else {
return null;
}
}).collect(Collectors.toList());
}
} else {
log.info("Headers are in first row.");
return rowCells.stream().map(cell -> {
if (CollectionUtils.isNotEmpty(cell.getTextBlocks())) {
return cell.getTextBlocks().get(0).getText();
} else {
return null;
}
}).collect(Collectors.toList());
}
}
private List<List<Cell>> computeRows() {
List<List<Cell>> rows = new ArrayList<>();
@ -93,6 +172,7 @@ public class Table extends AbstractTextContainer {
}
public void add(Cell chunk, int row, int col) {
rowCount = Math.max(rowCount, row + 1);
@ -103,6 +183,7 @@ public class Table extends AbstractTextContainer {
}
private void addCells(List<Cell> cells) {
if (cells.isEmpty()) {
@ -131,14 +212,9 @@ public class Table extends AbstractTextContainer {
while (rowCells.hasNext()) {
Cell cell = rowCells.next();
if (i > 0) {
List<List<Cell>> others = rowsOfCells(
si.contains(
new Rectangle(cell.getBottom(),
si.getBounds().getLeft(),
cell.getLeft() - si.getBounds().getLeft() + 1,
si.getBounds().getBottom() - cell.getBottom()
)
));
List<List<Cell>> others = rowsOfCells(si.contains(new Rectangle(cell.getBottom(), si.getBounds()
.getLeft(), cell.getLeft() - si.getBounds().getLeft() + 1, si.getBounds().getBottom() - cell
.getBottom())));
for (List<Cell> r : others) {
jumpToColumn = Math.max(jumpToColumn, r.size());
@ -158,7 +234,9 @@ public class Table extends AbstractTextContainer {
}
}
private static List<List<Cell>> rowsOfCells(List<Cell> cells) {
Cell c;
float lastTop;
List<List<Cell>> rv = new ArrayList<>();
@ -168,19 +246,10 @@ public class Table extends AbstractTextContainer {
return rv;
}
Collections.sort(cells, new Comparator<Cell>() {
@Override
public int compare(Cell arg0, Cell arg1) {
return Double.compare(arg0.getLeft(), arg1.getLeft());
}
});
cells.sort(Comparator.comparingDouble(Rectangle::getLeft));
Collections.sort(cells, Collections.reverseOrder(new Comparator<Cell>() {
@Override
public int compare(Cell arg0, Cell arg1) {
return Float.compare(Utils.round(arg0.getBottom(), 2), Utils.round(arg1.getBottom(),2));
}
}));
cells.sort(Collections.reverseOrder((arg0, arg1) -> Float.compare(Utils.round(arg0.getBottom(), 2), Utils.round(arg1
.getBottom(), 2))));
Iterator<Cell> iter = cells.iterator();
c = iter.next();
@ -201,6 +270,7 @@ public class Table extends AbstractTextContainer {
return rv;
}
@Override
public String getText() {
@ -237,6 +307,7 @@ public class Table extends AbstractTextContainer {
return sb.toString();
}
public String getTextAsHtml() {
StringBuilder sb = new StringBuilder();
@ -270,22 +341,30 @@ public class Table extends AbstractTextContainer {
return sb.toString();
}
class CellPosition implements Comparable<CellPosition> {
static class CellPosition implements Comparable<CellPosition> {
CellPosition(int row, int col) {
this.row = row;
this.col = col;
}
final int row, col;
final int row;
final int col;
@Override
public int hashCode() {
return row + 101 * col;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
@ -299,10 +378,12 @@ public class Table extends AbstractTextContainer {
return row == other.row && col == other.col;
}
@Override
public int compareTo(CellPosition other) {
int rowdiff = row - other.row;
return rowdiff != 0 ? rowdiff : col - other.col;
int rowDiff = row - other.row;
return rowDiff != 0 ? rowDiff : col - other.col;
}
}

View File

@ -19,7 +19,6 @@ import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.kie.api.KieServices;
@ -48,7 +47,6 @@ import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
@Ignore
@RunWith(SpringRunner.class)
@SpringBootTest(webEnvironment = DEFINED_PORT)
public class RedactionIntegrationTest {
@ -99,7 +97,7 @@ public class RedactionIntegrationTest {
@Before
public void stubRulesClient() {
public void stubClients() {
when(rulesClient.getVersion()).thenReturn(0L);
when(rulesClient.getRules()).thenReturn(new RulesResponse(RULES));
@ -241,6 +239,27 @@ public class RedactionIntegrationTest {
System.out.println("numberOfPages: " + result.getNumberOfPages());
}
@Test
public void testTableRedaction() throws IOException {
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
RedactionRequest request = RedactionRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
.build();
RedactionResult result = redactionController.redact(request);
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Redacted.pdf")) {
fileOutputStream.write(result.getDocument());
}
long end = System.currentTimeMillis();
System.out.println("duration: " + (end - start));
System.out.println("numberOfPages: " + result.getNumberOfPages());
}
@Test
public void classificationTest() throws IOException {

View File

@ -1,36 +1,91 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.io.IOUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.kie.api.KieServices;
import org.kie.api.builder.KieBuilder;
import org.kie.api.builder.KieFileSystem;
import org.kie.api.builder.KieModule;
import org.kie.api.runtime.KieContainer;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.context.TestConfiguration;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.context.annotation.Bean;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit4.SpringRunner;
import com.iqser.red.service.configuration.v1.api.model.DefaultColor;
import com.iqser.red.service.configuration.v1.api.model.DictionaryResponse;
import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
@RunWith(SpringRunner.class)
@SpringBootTest
public class EntityRedactionServiceTest {
@MockBean
private KieContainer kieContainer;
private static final String DEFAULT_RULES = loadFromClassPath("drools/rules.drl");
private static final String NAME_CODE = "name";
private static final String ADDRESS_CODE = "address";
@MockBean
private DroolsExecutionService droolsExecutionService;
private DictionaryClient dictionaryClient;
@MockBean
private DictionaryService dictionaryService;
private RulesClient rulesClient;
@Autowired
private EntityRedactionService entityRedactionService;
@Autowired
private PdfSegmentationService pdfSegmentationService;
@TestConfiguration
public static class RedactionIntegrationTestConfiguration {
@Bean
public KieContainer kieContainer() {
KieServices kieServices = KieServices.Factory.get();
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
InputStream input = new ByteArrayInputStream(DEFAULT_RULES.getBytes(StandardCharsets.UTF_8));
kieFileSystem.write("src/test/resources/drools/rules.drl", kieServices.getResources()
.newInputStreamResource(input));
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
kieBuilder.buildAll();
KieModule kieModule = kieBuilder.getKieModule();
return kieServices.newKieContainer(kieModule.getReleaseId());
}
}
@Test
public void testNestedEntitiesRemoval() {
@ -47,4 +102,74 @@ public class EntityRedactionServiceTest {
}
@Test
public void testTableRedaction() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
RedactionRequest redactionRequest = RedactionRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
.build();
String tableRules = "package drools\n" +
"\n" +
"import com.iqser.red.service.redaction.v1.server.redaction.model.Section\n" +
"\n" +
"global Section section\n" +
"rule \"8: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study\"\n" +
" when\n" +
" Section(tabularData != null && tabularData.size() > 0\n" +
" && tabularData.containsKey(\"Vertebrate\\nstudy Y/N\")\n" +
" && tabularData.get(\"Vertebrate\\nstudy Y/N\").equals(\"Y\")\n" +
" )\n" +
" then\n" +
" section.redact(\"name\", 8, \"Redacted because row is a vertebrate study\");\n" +
" section.redact(\"address\", 8, \"Redacted because rows is a vertebrate study\");\n" +
" section.highlightCell(\"Vertebrate\\nstudy Y/N\");\n" +
" end";
when(rulesClient.getVersion()).thenReturn(1L);
when(rulesClient.getRules()).thenReturn(new RulesResponse(tableRules));
TypeResponse typeResponse = TypeResponse.builder()
.types(Arrays.asList(
TypeResult.builder().type(NAME_CODE).color(new float[]{1, 1, 0}).build(),
TypeResult.builder().type(ADDRESS_CODE).color(new float[]{0, 1, 1}).build()))
.build();
when(dictionaryClient.getAllTypes()).thenReturn(typeResponse);
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(Arrays.asList("Casey, H.W.", "OLoughlin, C.K.", "Salamon, C.M.", "Smith, S.H."))
.build();
when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA"))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
when(dictionaryClient.getDefaultColor()).thenReturn(new DefaultColor());
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
entityRedactionService.processDocument(classifiedDoc);
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1)).hasSize(4); // 4 out of 5 entities recognized on page 1
}
}
private static String loadFromClassPath(String path) {
URL resource = ResourceLoader.class.getClassLoader().getResource(path);
if (resource == null) {
throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl");
}
try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), StandardCharsets.UTF_8))) {
StringBuilder sb = new StringBuilder();
String str;
while ((str = br.readLine()) != null) {
sb.append(str).append("\n");
}
return sb.toString();
} catch (IOException e) {
throw new IllegalArgumentException("could not load classpath resource: " + path, e);
}
}
}

View File

@ -99,3 +99,14 @@ rule "8: Redact contact information, if Producer is found"
section.redactBetween("No:", "Fax", "address", 8, "Producer was found");
end
rule "9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study"
when
Section(tabularData != null && tabularData.size() > 0
&& tabularData.containsKey("Vertebrate\nstudy Y/N")
&& tabularData.get("Vertebrate\nstudy Y/N").equals("Y")
)
then
section.redact("name", 8, "Redacted because row is a vertebrate study");
section.redact("address", 8, "Redacted because rows is a vertebrate study");
section.highlightCell("Vertebrate\nstudy Y/N");
end