Pull request #20: RED-101
Merge in RED/redaction-service from RED-101 to master * commit 'c93ca745fc61fc2d7f1a1f474a4e3c464091e70d': Normalize header information Fix test and suppress checkstyle warnings Fix PMD errors RED-101: Detect vertebrate study row value RED-101: Implement table cell and row redaction Fix style
This commit is contained in:
commit
96ba93f774
@ -10,7 +10,6 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
public class Paragraph {
|
||||
@ -18,10 +17,12 @@ public class Paragraph {
|
||||
private List<AbstractTextContainer> pageBlocks = new ArrayList<>();
|
||||
private String headline;
|
||||
|
||||
public SearchableText getSearchableText(){
|
||||
|
||||
public SearchableText getSearchableText() {
|
||||
|
||||
SearchableText searchableText = new SearchableText();
|
||||
pageBlocks.forEach(block -> {
|
||||
if(block instanceof TextBlock){
|
||||
if (block instanceof TextBlock) {
|
||||
searchableText.addAll(((TextBlock) block).getSequences());
|
||||
}
|
||||
});
|
||||
@ -29,14 +30,15 @@ public class Paragraph {
|
||||
}
|
||||
|
||||
|
||||
public List<Table> getTables(){
|
||||
public List<Table> getTables() {
|
||||
|
||||
List<Table> tables = new ArrayList<>();
|
||||
pageBlocks.forEach(block -> {
|
||||
if(block instanceof Table){
|
||||
if (block instanceof Table) {
|
||||
tables.add((Table) block);
|
||||
}
|
||||
});
|
||||
return tables;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@ -5,43 +5,45 @@ import java.util.Map;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class StringFrequencyCounter {
|
||||
|
||||
@Getter
|
||||
Map<String, Integer> countPerValue = new HashMap<>();
|
||||
private final Map<String, Integer> countPerValue = new HashMap<>();
|
||||
|
||||
public void add(String value){
|
||||
if(!countPerValue.containsKey(value)){
|
||||
|
||||
public void add(String value) {
|
||||
|
||||
if (!countPerValue.containsKey(value)) {
|
||||
countPerValue.put(value, 1);
|
||||
} else {
|
||||
countPerValue.put(value, countPerValue.get(value) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
public void addAll(Map<String, Integer> otherCounter){
|
||||
for(Map.Entry<String, Integer> entry: otherCounter.entrySet()){
|
||||
if(countPerValue.containsKey(entry.getKey())){
|
||||
countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey())+ entry.getValue());
|
||||
|
||||
public void addAll(Map<String, Integer> otherCounter) {
|
||||
|
||||
for (Map.Entry<String, Integer> entry : otherCounter.entrySet()) {
|
||||
if (countPerValue.containsKey(entry.getKey())) {
|
||||
countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey()) + entry.getValue());
|
||||
} else {
|
||||
countPerValue.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String getMostPopular(){
|
||||
|
||||
public String getMostPopular() {
|
||||
|
||||
Map.Entry<String, Integer> mostPopular = null;
|
||||
for(Map.Entry<String, Integer> entry: countPerValue.entrySet()){
|
||||
if(mostPopular == null){
|
||||
for (Map.Entry<String, Integer> entry : countPerValue.entrySet()) {
|
||||
if (mostPopular == null) {
|
||||
mostPopular = entry;
|
||||
} else if(entry.getValue() > mostPopular.getValue()){
|
||||
} else if (entry.getValue() > mostPopular.getValue()) {
|
||||
mostPopular = entry;
|
||||
}
|
||||
}
|
||||
return mostPopular != null ? mostPopular.getKey() : null;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
@ -29,20 +29,16 @@ public class BlockificationService {
|
||||
float minX = 1000, maxX = 0, minY = 1000, maxY = 0;
|
||||
TextPositionSequence prev = null;
|
||||
|
||||
|
||||
for (TextPositionSequence word : textPositions) {
|
||||
|
||||
boolean lineSeparation = minY - word.getY2() > word.getHeight() * 1.25;
|
||||
boolean startFromTop = word.getY1() > maxY + word.getHeight();
|
||||
|
||||
if (prev != null &&
|
||||
(lineSeparation
|
||||
|| startFromTop
|
||||
|| word.getRotation() == 0 && isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), verticalRulingLines)
|
||||
|| word.getRotation() == 0 && isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), horizontalRulingLines)
|
||||
|| word.getRotation() == 90 && isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), horizontalRulingLines)
|
||||
|| word.getRotation() == 90 && isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), verticalRulingLines)
|
||||
)) {
|
||||
if (prev != null && (lineSeparation || startFromTop || word.getRotation() == 0 && isSplittedByRuling(maxX, minY, word
|
||||
.getX1(), word.getY1(), verticalRulingLines) || word.getRotation() == 0 && isSplittedByRuling(minX, minY, word
|
||||
.getX1(), word.getY2(), horizontalRulingLines) || word.getRotation() == 90 && isSplittedByRuling(maxX, minY, word
|
||||
.getX1(), word.getY1(), horizontalRulingLines) || word.getRotation() == 90 && isSplittedByRuling(minX, minY, word
|
||||
.getX1(), word.getY2(), verticalRulingLines))) {
|
||||
|
||||
TextBlock cb1 = buildTextBlock(chunkWords);
|
||||
chunkBlockList1.add(cb1);
|
||||
@ -100,11 +96,12 @@ public class BlockificationService {
|
||||
styleFrequencyCounter.add(wordBlock.getFontStyle());
|
||||
|
||||
if (textBlock == null) {
|
||||
textBlock = new TextBlock(wordBlock.getX1(), wordBlock.getX2(), wordBlock.getY1(), wordBlock.getY2(), wordBlockList, wordBlock.getRotation());
|
||||
textBlock = new TextBlock(wordBlock.getX1(), wordBlock.getX2(), wordBlock.getY1(), wordBlock.getY2(), wordBlockList, wordBlock
|
||||
.getRotation());
|
||||
} else {
|
||||
TextBlock spatialEntity = textBlock.union(wordBlock);
|
||||
textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(),
|
||||
spatialEntity.getWidth(), spatialEntity.getHeight());
|
||||
textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(), spatialEntity.getWidth(), spatialEntity
|
||||
.getHeight());
|
||||
}
|
||||
}
|
||||
|
||||
@ -122,6 +119,7 @@ public class BlockificationService {
|
||||
|
||||
|
||||
private boolean isSplittedByRuling(float previousX2, float previousY1, float currentX1, float currentY1, List<Ruling> rulingLines) {
|
||||
|
||||
for (Ruling ruling : rulingLines) {
|
||||
if (ruling.intersectsLine(previousX2, previousY1, currentX1, currentY1)) {
|
||||
return true;
|
||||
@ -133,7 +131,6 @@ public class BlockificationService {
|
||||
|
||||
public Rectangle calculateBodyTextFrame(List<Page> pages, FloatFrequencyCounter documentFontSizeCounter, boolean landscape) {
|
||||
|
||||
|
||||
float minX = 10000;
|
||||
float maxX = -100;
|
||||
float minY = 10000;
|
||||
@ -147,7 +144,6 @@ public class BlockificationService {
|
||||
|
||||
for (AbstractTextContainer container : page.getTextBlocks()) {
|
||||
|
||||
|
||||
if (container instanceof TextBlock) {
|
||||
TextBlock textBlock = (TextBlock) container;
|
||||
if (textBlock.getMostPopularWordFont() == null || textBlock.getMostPopularWordStyle() == null) {
|
||||
@ -179,16 +175,15 @@ public class BlockificationService {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (container instanceof Table) {
|
||||
Table table = (Table) container;
|
||||
for (List<Cell> row : table.getRows()) {
|
||||
for (Cell column : row) {
|
||||
for (Cell cell : row) {
|
||||
|
||||
if (column == null || column.getTextBlocks() == null) {
|
||||
if (cell == null || cell.getTextBlocks() == null) {
|
||||
continue;
|
||||
}
|
||||
for (TextBlock textBlock : column.getTextBlocks()) {
|
||||
for (TextBlock textBlock : cell.getTextBlocks()) {
|
||||
if (textBlock.getMinX() < minX) {
|
||||
minX = textBlock.getMinX();
|
||||
}
|
||||
@ -211,5 +206,4 @@ public class BlockificationService {
|
||||
return new Rectangle(minY, minX, maxX - minX, maxY - minY);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -8,10 +8,9 @@ import java.util.regex.Pattern;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
|
||||
@SuppressWarnings("all")
|
||||
public class SearchableText {
|
||||
|
||||
private List<TextPositionSequence> sequences = new ArrayList<>();
|
||||
private final List<TextPositionSequence> sequences = new ArrayList<>();
|
||||
|
||||
|
||||
public void add(TextPositionSequence textPositionSequence) {
|
||||
@ -26,6 +25,7 @@ public class SearchableText {
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("checkstyle:ModifiedControlVariable")
|
||||
public List<EntityPositionSequence> getSequences(String searchString, boolean caseInsensitive) {
|
||||
|
||||
String normalizedSearchString;
|
||||
@ -163,7 +163,7 @@ public class SearchableText {
|
||||
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString())
|
||||
.replaceAll("\n", " ")
|
||||
.replaceAll(" ", " ");
|
||||
.replaceAll(" {2}", " ");
|
||||
}
|
||||
|
||||
|
||||
@ -187,4 +187,4 @@ public class SearchableText {
|
||||
return sb.append("\n").toString();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@ -3,15 +3,19 @@ package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Data
|
||||
@Slf4j
|
||||
@Builder
|
||||
public class Section {
|
||||
|
||||
@ -27,6 +31,8 @@ public class Section {
|
||||
|
||||
private int sectionNumber;
|
||||
|
||||
private Map<String, String> tabularData;
|
||||
|
||||
|
||||
public boolean contains(String type) {
|
||||
|
||||
@ -71,7 +77,7 @@ public class Section {
|
||||
if (values != null) {
|
||||
for (String value : values) {
|
||||
if (StringUtils.isNotBlank(value)) {
|
||||
Set<Entity> found = findEntity(value.trim(), asType);
|
||||
Set<Entity> found = findEntities(value.trim(), asType);
|
||||
entities.addAll(found);
|
||||
}
|
||||
}
|
||||
@ -95,8 +101,8 @@ public class Section {
|
||||
|
||||
if (values != null) {
|
||||
for (String value : values) {
|
||||
if (value != null && StringUtils.isNotBlank(value)) {
|
||||
Set<Entity> found = findEntity(value.trim(), asType);
|
||||
if (StringUtils.isNotBlank(value)) {
|
||||
Set<Entity> found = findEntities(value.trim(), asType);
|
||||
entities.addAll(found);
|
||||
}
|
||||
}
|
||||
@ -113,7 +119,7 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> findEntity(String value, String asType) {
|
||||
private Set<Entity> findEntities(String value, String asType) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
|
||||
@ -154,4 +160,25 @@ public class Section {
|
||||
return entities;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void highlightCell(String cellHeader, int ruleNumber) {
|
||||
|
||||
String value = tabularData.get(cellHeader);
|
||||
if (value == null) {
|
||||
log.warn("Could not find any data for {}.", cellHeader);
|
||||
} else {
|
||||
Set<Entity> found = findEntities(value, "must_redact");
|
||||
if (CollectionUtils.isEmpty(found)) {
|
||||
log.warn("Could not identify value {} in row.", value);
|
||||
} else {
|
||||
Entity entity = found.iterator().next();
|
||||
entity.setRedaction(false);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
entity.setRedactionReason(cellHeader);
|
||||
entities.add(entity);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -8,6 +8,8 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
@ -44,24 +46,30 @@ public class EntityRedactionService {
|
||||
List<Table> tables = paragraph.getTables();
|
||||
|
||||
for (Table table : tables) {
|
||||
List<String> metadata = table.getHeaders();
|
||||
for (List<Cell> row : table.getRows()) {
|
||||
SearchableText searchableRow = new SearchableText();
|
||||
for (Cell column : row) {
|
||||
if (column == null || column.getTextBlocks() == null) {
|
||||
List<String> cellValues = new ArrayList<>();
|
||||
for (Cell cell : row) {
|
||||
if (cell == null || CollectionUtils.isEmpty(cell.getTextBlocks())) {
|
||||
cellValues.add(null);
|
||||
continue;
|
||||
}
|
||||
for (TextBlock textBlock : column.getTextBlocks()) {
|
||||
cellValues.add(cell.getTextBlocks().get(0).getText());
|
||||
for (TextBlock textBlock : cell.getTextBlocks()) {
|
||||
searchableRow.addAll(textBlock.getSequences());
|
||||
}
|
||||
}
|
||||
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber);
|
||||
|
||||
Map<String, String> tabularData = toMap(metadata, cellValues);
|
||||
Section analysedRowSection = droolsExecutionService.executeRules(Section.builder()
|
||||
.entities(rowEntities)
|
||||
.text(searchableRow.getAsStringWithLinebreaks())
|
||||
.searchText(searchableRow.toString())
|
||||
.headline(table.getHeadline())
|
||||
.sectionNumber(sectionNumber)
|
||||
.tabularData(tabularData)
|
||||
.build());
|
||||
|
||||
documentEntities.addAll(clearAndFindPositions(analysedRowSection.getEntities(), searchableRow));
|
||||
@ -93,7 +101,8 @@ public class EntityRedactionService {
|
||||
for (Map.Entry<Integer, List<EntityPositionSequence>> entry : sequenceOnPage.entrySet()) {
|
||||
classifiedDoc.getEntities()
|
||||
.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
|
||||
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
|
||||
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(),
|
||||
entity.getRedactionReason(), entry
|
||||
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber()));
|
||||
}
|
||||
}
|
||||
@ -101,6 +110,21 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
|
||||
private Map<String, String> toMap(List<String> keys, List<String> values) {
|
||||
|
||||
if (keys.size() != values.size()) {
|
||||
throw new RuntimeException("Cannot merge lists of unequal size.");
|
||||
}
|
||||
Map<String, String> result = new HashMap<>();
|
||||
for (int i = 0; i < keys.size(); i++) {
|
||||
result.put(keys.get(i), values.get(i));
|
||||
}
|
||||
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> clearAndFindPositions(Set<Entity> entities, SearchableText text) {
|
||||
|
||||
removeEntitiesContainedInLarger(entities);
|
||||
@ -119,12 +143,14 @@ public class EntityRedactionService {
|
||||
|
||||
private Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
if (StringUtils.isEmpty(searchableText.toString()) && StringUtils.isEmpty(headline)) {
|
||||
return found;
|
||||
}
|
||||
|
||||
String inputString = searchableText.toString();
|
||||
String lowercaseInputString = inputString.toLowerCase();
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
for (Map.Entry<String, Set<String>> entry : dictionaryService.getDictionary().entrySet()) {
|
||||
|
||||
if (dictionaryService.getCaseInsensitiveTypes().contains(entry.getKey())) {
|
||||
found.addAll(find(lowercaseInputString, entry.getValue(), entry.getKey(), headline, sectionNumber));
|
||||
} else {
|
||||
@ -151,7 +177,8 @@ public class EntityRedactionService {
|
||||
|
||||
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString
|
||||
.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
|
||||
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber));
|
||||
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex,
|
||||
headline, sectionNumber));
|
||||
}
|
||||
} while (startIndex > -1);
|
||||
}
|
||||
|
||||
@ -29,7 +29,6 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@SuppressWarnings("PMD")
|
||||
public class PdfSegmentationService {
|
||||
|
||||
private final RulingCleaningService rulingCleaningService;
|
||||
|
||||
@ -4,6 +4,8 @@ import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
@ -14,7 +16,6 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractT
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
@Service
|
||||
@SuppressWarnings("all")
|
||||
public class SectionsBuilderService {
|
||||
|
||||
public void buildSections(Document document) {
|
||||
@ -25,6 +26,7 @@ public class SectionsBuilderService {
|
||||
AbstractTextContainer prev = null;
|
||||
|
||||
String lastHeadline = "";
|
||||
Table previousTable = null;
|
||||
for (Page page : document.getPages()) {
|
||||
for (AbstractTextContainer current : page.getTextBlocks()) {
|
||||
|
||||
@ -36,32 +38,30 @@ public class SectionsBuilderService {
|
||||
current.setPage(page.getPageNumber());
|
||||
|
||||
if (prev != null && current.getClassification().startsWith("H ") || !document.isHeadlines()) {
|
||||
|
||||
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline);
|
||||
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline, previousTable);
|
||||
chunkBlock.setHeadline(lastHeadline);
|
||||
lastHeadline = current.getText();
|
||||
if (CollectionUtils.isNotEmpty(chunkBlock.getTables())) {
|
||||
previousTable = chunkBlock.getTables().get(0);
|
||||
}
|
||||
chunkBlockList.add(chunkBlock);
|
||||
chunkWords = new ArrayList<>();
|
||||
|
||||
}
|
||||
|
||||
chunkWords.add(current);
|
||||
|
||||
prev = current;
|
||||
}
|
||||
}
|
||||
|
||||
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline);
|
||||
if (chunkBlock != null) {
|
||||
chunkBlockList.add(chunkBlock);
|
||||
chunkBlock.setHeadline(lastHeadline);
|
||||
}
|
||||
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline, previousTable);
|
||||
chunkBlock.setHeadline(lastHeadline);
|
||||
chunkBlockList.add(chunkBlock);
|
||||
|
||||
document.setParagraphs(chunkBlockList);
|
||||
}
|
||||
|
||||
|
||||
private Paragraph buildTextBlock(List<AbstractTextContainer> wordBlockList, String lastHeadline) {
|
||||
private Paragraph buildTextBlock(List<AbstractTextContainer> wordBlockList, String lastHeadline, Table previousTable) {
|
||||
|
||||
Paragraph paragraph = new Paragraph();
|
||||
TextBlock textBlock = null;
|
||||
@ -76,19 +76,26 @@ public class SectionsBuilderService {
|
||||
AbstractTextContainer container = itty.next();
|
||||
|
||||
if (container instanceof Table) {
|
||||
Table table = (Table) container;
|
||||
splitByTable = true;
|
||||
|
||||
if (previous != null && previous instanceof TextBlock && previous.getText().startsWith("Table ")) {
|
||||
((Table) container).setHeadline(previous.getText());
|
||||
if (previous != null && previous.getText().startsWith("Table ")) {
|
||||
table.setHeadline(previous.getText());
|
||||
} else {
|
||||
((Table) container).setHeadline("Table in: " + lastHeadline);
|
||||
table.setHeadline("Table in: " + lastHeadline);
|
||||
}
|
||||
// Distribute header information for subsequent tables
|
||||
if (previousTable != null && hasInvalidHeaderInformation(table) && hasValidHeaderInformation(previousTable) &&
|
||||
(previousTable.isVerticalHeader() && previousTable.getRowCount() == table.getRowCount() ||
|
||||
previousTable.getColCount() == table.getColCount())) {
|
||||
table.setHeaders(previousTable.getHeaders());
|
||||
}
|
||||
|
||||
if (textBlock != null && !alreadyAdded) {
|
||||
paragraph.getPageBlocks().add(textBlock);
|
||||
alreadyAdded = true;
|
||||
}
|
||||
paragraph.getPageBlocks().add(container);
|
||||
paragraph.getPageBlocks().add(table);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -125,4 +132,24 @@ public class SectionsBuilderService {
|
||||
return paragraph;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private boolean hasValidHeaderInformation(Table table) {
|
||||
|
||||
return !hasInvalidHeaderInformation(table);
|
||||
}
|
||||
|
||||
|
||||
private boolean hasInvalidHeaderInformation(Table table) {
|
||||
|
||||
if (CollectionUtils.isEmpty(table.getHeaders())) {
|
||||
return true;
|
||||
}
|
||||
if (table.getHeaders().stream().anyMatch(StringUtils::isEmpty)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -16,11 +16,17 @@ public class Cell extends Rectangle {
|
||||
|
||||
private List<TextBlock> textBlocks = new ArrayList<>();
|
||||
|
||||
|
||||
public Cell(Point2D topLeft, Point2D bottomRight) {
|
||||
super((float) topLeft.getY(), (float) topLeft.getX(), (float) (bottomRight.getX() - topLeft.getX()), (float) (bottomRight.getY() - topLeft.getY()));
|
||||
|
||||
super((float) topLeft.getY(), (float) topLeft.getX(), (float) (bottomRight.getX() - topLeft.getX()), (float) (bottomRight
|
||||
.getY() - topLeft.getY()));
|
||||
}
|
||||
|
||||
|
||||
public void addTextBlock(TextBlock textBlock) {
|
||||
|
||||
textBlocks.add(textBlock);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -8,25 +8,28 @@ import org.locationtech.jts.index.strtree.STRtree;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||
|
||||
|
||||
@SuppressWarnings("all")
|
||||
public class RectangleSpatialIndex<T extends Rectangle> {
|
||||
|
||||
|
||||
private final STRtree si = new STRtree();
|
||||
private final List<T> rectangles = new ArrayList<>();
|
||||
|
||||
|
||||
public void add(T te) {
|
||||
|
||||
rectangles.add(te);
|
||||
si.insert(new Envelope(te.getLeft(), te.getRight(), te.getBottom(), te.getTop()), te);
|
||||
}
|
||||
|
||||
public List<T> contains(Rectangle r) {
|
||||
List<T> intersection = si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom()));
|
||||
|
||||
|
||||
public List<T> contains(Rectangle rectangle) {
|
||||
|
||||
List<T> intersection = si.query(new Envelope(rectangle.getLeft(), rectangle.getRight(), rectangle.getTop(), rectangle
|
||||
.getBottom()));
|
||||
List<T> rv = new ArrayList<T>();
|
||||
|
||||
for (T ir: intersection) {
|
||||
if (r.contains(ir)) {
|
||||
for (T ir : intersection) {
|
||||
if (rectangle.contains(ir)) {
|
||||
rv.add(ir);
|
||||
}
|
||||
}
|
||||
@ -34,18 +37,22 @@ public class RectangleSpatialIndex<T extends Rectangle> {
|
||||
Utils.sort(rv, Rectangle.ILL_DEFINED_ORDER);
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public List<T> intersects(Rectangle r) {
|
||||
|
||||
List rv = si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom()));
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Minimum bounding box of all the Rectangles contained on this RectangleSpatialIndex
|
||||
*
|
||||
*
|
||||
* @return a Rectangle
|
||||
*/
|
||||
public Rectangle getBounds() {
|
||||
|
||||
return Rectangle.boundingBoxOf(rectangles);
|
||||
}
|
||||
|
||||
|
||||
@ -8,32 +8,45 @@ import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@SuppressWarnings("all")
|
||||
@Slf4j
|
||||
public class Table extends AbstractTextContainer {
|
||||
|
||||
private final TreeMap<CellPosition, Cell> cells = new TreeMap<>();
|
||||
|
||||
private RectangleSpatialIndex<Cell> si = new RectangleSpatialIndex<>();
|
||||
private final RectangleSpatialIndex<Cell> si = new RectangleSpatialIndex<>();
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
private String headline;
|
||||
|
||||
@Getter
|
||||
private int rowCount = 0;
|
||||
private int rowCount;
|
||||
|
||||
@Getter
|
||||
private int colCount = 0;
|
||||
private int colCount;
|
||||
|
||||
private int rotation = 0;
|
||||
private final int rotation;
|
||||
|
||||
private List<List<Cell>> memoizedRows = null;
|
||||
private List<List<Cell>> rows;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
private List<String> headers;
|
||||
|
||||
@Getter
|
||||
private boolean verticalHeader;
|
||||
|
||||
public Table(List<Cell> cells, Rectangle area, int rotation) {
|
||||
|
||||
@ -47,16 +60,87 @@ public class Table extends AbstractTextContainer {
|
||||
|
||||
}
|
||||
|
||||
|
||||
public List<List<Cell>> getRows() {
|
||||
|
||||
if (memoizedRows == null) {
|
||||
memoizedRows = computeRows();
|
||||
if (rows == null) {
|
||||
rows = computeRows();
|
||||
headers = computeHeaders();
|
||||
}
|
||||
|
||||
return memoizedRows;
|
||||
return rows;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Detect header cells (either first row or first column):
|
||||
* Column is marked as header if cell text is bold and row cell text is not bold.
|
||||
* Defaults to row.
|
||||
*/
|
||||
private List<String> computeHeaders() {
|
||||
|
||||
boolean allBold = true;
|
||||
List<Cell> rowCells = rows.get(0);
|
||||
for (Cell cell : rowCells) {
|
||||
if (cell == null || CollectionUtils.isEmpty(cell.getTextBlocks()) ||
|
||||
!cell.getTextBlocks().get(0).getMostPopularWordStyle().equals("bold")) {
|
||||
allBold = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!allBold) {
|
||||
allBold = true;
|
||||
List<Cell> firstColCells = new ArrayList<>();
|
||||
for (List<Cell> row : rows) {
|
||||
Cell firstInRow = row.get(0);
|
||||
if (firstInRow == null || CollectionUtils.isEmpty(firstInRow.getTextBlocks()) ||
|
||||
!firstInRow.getTextBlocks().get(0).getMostPopularWordStyle().equals("bold")) {
|
||||
allBold = false;
|
||||
break;
|
||||
}
|
||||
firstColCells.add(firstInRow);
|
||||
}
|
||||
if (allBold) {
|
||||
log.info("Headers are in first column");
|
||||
verticalHeader = true;
|
||||
return firstColCells.stream().map(cell -> {
|
||||
if (CollectionUtils.isNotEmpty(cell.getTextBlocks())) {
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(cell.getTextBlocks().get(0).getText())
|
||||
.replaceAll("\n", " ")
|
||||
.replaceAll(" ", " ");
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}).collect(Collectors.toList());
|
||||
} else {
|
||||
log.info("Headers are defaulted in first row.");
|
||||
return rowCells.stream().map(cell -> {
|
||||
if (cell != null && CollectionUtils.isNotEmpty(cell.getTextBlocks())) {
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(cell.getTextBlocks().get(0).getText())
|
||||
.replaceAll("\n", " ")
|
||||
.replaceAll(" ", " ");
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
} else {
|
||||
log.info("Headers are in first row.");
|
||||
return rowCells.stream().map(cell -> {
|
||||
if (CollectionUtils.isNotEmpty(cell.getTextBlocks())) {
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(cell.getTextBlocks().get(0).getText())
|
||||
.replaceAll("\n", " ")
|
||||
.replaceAll(" ", " ");
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
private List<List<Cell>> computeRows() {
|
||||
|
||||
List<List<Cell>> rows = new ArrayList<>();
|
||||
@ -93,7 +177,8 @@ public class Table extends AbstractTextContainer {
|
||||
|
||||
}
|
||||
|
||||
public void add(Cell chunk, int row, int col) {
|
||||
|
||||
private void add(Cell chunk, int row, int col) {
|
||||
|
||||
rowCount = Math.max(rowCount, row + 1);
|
||||
colCount = Math.max(colCount, col + 1);
|
||||
@ -103,6 +188,7 @@ public class Table extends AbstractTextContainer {
|
||||
|
||||
}
|
||||
|
||||
|
||||
private void addCells(List<Cell> cells) {
|
||||
|
||||
if (cells.isEmpty()) {
|
||||
@ -131,14 +217,9 @@ public class Table extends AbstractTextContainer {
|
||||
while (rowCells.hasNext()) {
|
||||
Cell cell = rowCells.next();
|
||||
if (i > 0) {
|
||||
List<List<Cell>> others = rowsOfCells(
|
||||
si.contains(
|
||||
new Rectangle(cell.getBottom(),
|
||||
si.getBounds().getLeft(),
|
||||
cell.getLeft() - si.getBounds().getLeft() + 1,
|
||||
si.getBounds().getBottom() - cell.getBottom()
|
||||
)
|
||||
));
|
||||
List<List<Cell>> others = rowsOfCells(si.contains(new Rectangle(cell.getBottom(), si.getBounds()
|
||||
.getLeft(), cell.getLeft() - si.getBounds().getLeft() + 1, si.getBounds().getBottom() - cell
|
||||
.getBottom())));
|
||||
|
||||
for (List<Cell> r : others) {
|
||||
jumpToColumn = Math.max(jumpToColumn, r.size());
|
||||
@ -158,7 +239,9 @@ public class Table extends AbstractTextContainer {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static List<List<Cell>> rowsOfCells(List<Cell> cells) {
|
||||
|
||||
Cell c;
|
||||
float lastTop;
|
||||
List<List<Cell>> rv = new ArrayList<>();
|
||||
@ -168,19 +251,10 @@ public class Table extends AbstractTextContainer {
|
||||
return rv;
|
||||
}
|
||||
|
||||
Collections.sort(cells, new Comparator<Cell>() {
|
||||
@Override
|
||||
public int compare(Cell arg0, Cell arg1) {
|
||||
return Double.compare(arg0.getLeft(), arg1.getLeft());
|
||||
}
|
||||
});
|
||||
cells.sort(Comparator.comparingDouble(Rectangle::getLeft));
|
||||
|
||||
Collections.sort(cells, Collections.reverseOrder(new Comparator<Cell>() {
|
||||
@Override
|
||||
public int compare(Cell arg0, Cell arg1) {
|
||||
return Float.compare(Utils.round(arg0.getBottom(), 2), Utils.round(arg1.getBottom(),2));
|
||||
}
|
||||
}));
|
||||
cells.sort(Collections.reverseOrder((arg0, arg1) -> Float.compare(Utils.round(arg0.getBottom(), 2), Utils.round(arg1
|
||||
.getBottom(), 2))));
|
||||
|
||||
Iterator<Cell> iter = cells.iterator();
|
||||
c = iter.next();
|
||||
@ -201,6 +275,7 @@ public class Table extends AbstractTextContainer {
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getText() {
|
||||
|
||||
@ -237,6 +312,7 @@ public class Table extends AbstractTextContainer {
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
public String getTextAsHtml() {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
@ -270,22 +346,30 @@ public class Table extends AbstractTextContainer {
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
class CellPosition implements Comparable<CellPosition> {
|
||||
|
||||
static class CellPosition implements Comparable<CellPosition> {
|
||||
|
||||
CellPosition(int row, int col) {
|
||||
|
||||
this.row = row;
|
||||
this.col = col;
|
||||
}
|
||||
|
||||
final int row, col;
|
||||
|
||||
final int row;
|
||||
final int col;
|
||||
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
||||
return row + 101 * col;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
@ -299,10 +383,12 @@ public class Table extends AbstractTextContainer {
|
||||
return row == other.row && col == other.col;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int compareTo(CellPosition other) {
|
||||
int rowdiff = row - other.row;
|
||||
return rowdiff != 0 ? rowdiff : col - other.col;
|
||||
|
||||
int rowDiff = row - other.row;
|
||||
return rowDiff != 0 ? rowDiff : col - other.col;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -99,7 +99,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
|
||||
@Before
|
||||
public void stubRulesClient() {
|
||||
public void stubClients() {
|
||||
|
||||
when(rulesClient.getVersion()).thenReturn(0L);
|
||||
when(rulesClient.getRules()).thenReturn(new RulesResponse(RULES));
|
||||
@ -241,6 +241,27 @@ public class RedactionIntegrationTest {
|
||||
System.out.println("numberOfPages: " + result.getNumberOfPages());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTableRedaction() throws IOException {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.build();
|
||||
|
||||
RedactionResult result = redactionController.redact(request);
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Redacted.pdf")) {
|
||||
fileOutputStream.write(result.getDocument());
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
System.out.println("duration: " + (end - start));
|
||||
System.out.println("numberOfPages: " + result.getNumberOfPages());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void classificationTest() throws IOException {
|
||||
|
||||
@ -1,36 +1,91 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.kie.api.KieServices;
|
||||
import org.kie.api.builder.KieBuilder;
|
||||
import org.kie.api.builder.KieFileSystem;
|
||||
import org.kie.api.builder.KieModule;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.context.TestConfiguration;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
|
||||
import com.iqser.red.service.configuration.v1.api.model.DefaultColor;
|
||||
import com.iqser.red.service.configuration.v1.api.model.DictionaryResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||
|
||||
@RunWith(SpringRunner.class)
|
||||
@SpringBootTest
|
||||
public class EntityRedactionServiceTest {
|
||||
|
||||
@MockBean
|
||||
private KieContainer kieContainer;
|
||||
private static final String DEFAULT_RULES = loadFromClassPath("drools/rules.drl");
|
||||
private static final String NAME_CODE = "name";
|
||||
private static final String ADDRESS_CODE = "address";
|
||||
|
||||
@MockBean
|
||||
private DroolsExecutionService droolsExecutionService;
|
||||
private DictionaryClient dictionaryClient;
|
||||
|
||||
@MockBean
|
||||
private DictionaryService dictionaryService;
|
||||
private RulesClient rulesClient;
|
||||
|
||||
@Autowired
|
||||
private EntityRedactionService entityRedactionService;
|
||||
|
||||
@Autowired
|
||||
private PdfSegmentationService pdfSegmentationService;
|
||||
|
||||
@TestConfiguration
|
||||
public static class RedactionIntegrationTestConfiguration {
|
||||
|
||||
@Bean
|
||||
public KieContainer kieContainer() {
|
||||
|
||||
KieServices kieServices = KieServices.Factory.get();
|
||||
|
||||
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
|
||||
InputStream input = new ByteArrayInputStream(DEFAULT_RULES.getBytes(StandardCharsets.UTF_8));
|
||||
kieFileSystem.write("src/test/resources/drools/rules.drl", kieServices.getResources()
|
||||
.newInputStreamResource(input));
|
||||
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
|
||||
kieBuilder.buildAll();
|
||||
KieModule kieModule = kieBuilder.getKieModule();
|
||||
|
||||
return kieServices.newKieContainer(kieModule.getReleaseId());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testNestedEntitiesRemoval() {
|
||||
@ -47,4 +102,74 @@ public class EntityRedactionServiceTest {
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testTableRedaction() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
|
||||
|
||||
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.build();
|
||||
|
||||
String tableRules = "package drools\n" +
|
||||
"\n" +
|
||||
"import com.iqser.red.service.redaction.v1.server.redaction.model.Section\n" +
|
||||
"\n" +
|
||||
"global Section section\n" +
|
||||
"rule \"9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study\"\n" +
|
||||
" when\n" +
|
||||
" Section(tabularData != null && tabularData.size() > 0\n" +
|
||||
" && tabularData.containsKey(\"Vertebrate study Y/N\")\n" +
|
||||
" && tabularData.get(\"Vertebrate study Y/N\").equals(\"Y\")\n" +
|
||||
" )\n" +
|
||||
" then\n" +
|
||||
" section.redact(\"name\", 9, \"Redacted because row is a vertebrate study\");\n" +
|
||||
" section.redact(\"address\", 9, \"Redacted because rows is a vertebrate study\");\n" +
|
||||
" section.highlightCell(\"Vertebrate study Y/N\", 9);\n" +
|
||||
" end";
|
||||
when(rulesClient.getVersion()).thenReturn(1L);
|
||||
when(rulesClient.getRules()).thenReturn(new RulesResponse(tableRules));
|
||||
TypeResponse typeResponse = TypeResponse.builder()
|
||||
.types(Arrays.asList(
|
||||
TypeResult.builder().type(NAME_CODE).color(new float[]{1, 1, 0}).build(),
|
||||
TypeResult.builder().type(ADDRESS_CODE).color(new float[]{0, 1, 1}).build()))
|
||||
.build();
|
||||
when(dictionaryClient.getAllTypes()).thenReturn(typeResponse);
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H."))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA"))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDefaultColor()).thenReturn(new DefaultColor());
|
||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1)).hasSize(5); // 4 out of 5 entities recognized on page 1
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static String loadFromClassPath(String path) {
|
||||
|
||||
URL resource = ResourceLoader.class.getClassLoader().getResource(path);
|
||||
if (resource == null) {
|
||||
throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl");
|
||||
}
|
||||
try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), StandardCharsets.UTF_8))) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String str;
|
||||
while ((str = br.readLine()) != null) {
|
||||
sb.append(str).append("\n");
|
||||
}
|
||||
return sb.toString();
|
||||
} catch (IOException e) {
|
||||
throw new IllegalArgumentException("could not load classpath resource: " + path, e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -99,3 +99,14 @@ rule "8: Redact contact information, if Producer is found"
|
||||
section.redactBetween("No:", "Fax", "address", 8, "Producer was found");
|
||||
end
|
||||
|
||||
rule "9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study"
|
||||
when
|
||||
Section(tabularData != null && tabularData.size() > 0
|
||||
&& tabularData.containsKey("Vertebrate study Y/N")
|
||||
&& tabularData.get("Vertebrate study Y/N").equals("Y")
|
||||
)
|
||||
then
|
||||
section.redact("name", 9, "Redacted because row is a vertebrate study");
|
||||
section.redact("address", 9, "Redacted because rows is a vertebrate study");
|
||||
section.highlightCell("Vertebrate study Y/N", 9);
|
||||
end
|
||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user