Pull request #25: RED-101 & RED-102: Make table structure detection more robust
Merge in RED/redaction-service from bugfix/RED-101 to master * commit '76369f13f8d41154f3dd3690af81b9567ca1133e': Remove redundant comment RED-101: Add features as requested in PR Add unit test for table structure requirements Make table structure detection more robust
This commit is contained in:
commit
8c08bb3664
@ -3,6 +3,8 @@ package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
|
||||
@ -16,6 +18,7 @@ public class Entity {
|
||||
private boolean redaction;
|
||||
private String redactionReason;
|
||||
private List<EntityPositionSequence> positionSequences = new ArrayList<>();
|
||||
private List<TextPositionSequence> targetSequences;
|
||||
private Integer start;
|
||||
private Integer end;
|
||||
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@ -25,9 +26,16 @@ public class SearchableText {
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("checkstyle:ModifiedControlVariable")
|
||||
public List<EntityPositionSequence> getSequences(String searchString, boolean caseInsensitive) {
|
||||
|
||||
return getSequences(searchString, caseInsensitive, null);
|
||||
|
||||
}
|
||||
|
||||
@SuppressWarnings("checkstyle:ModifiedControlVariable")
|
||||
public List<EntityPositionSequence> getSequences(String searchString, boolean caseInsensitive,
|
||||
List<TextPositionSequence> sequencesSubList) {
|
||||
|
||||
String normalizedSearchString;
|
||||
if (caseInsensitive) {
|
||||
normalizedSearchString = searchString.toLowerCase();
|
||||
@ -40,37 +48,50 @@ public class SearchableText {
|
||||
|
||||
List<TextPositionSequence> crossSequenceParts = new ArrayList<>();
|
||||
List<EntityPositionSequence> finalMatches = new ArrayList<>();
|
||||
for (int i = 0; i < sequences.size(); i++) {
|
||||
TextPositionSequence partMatch = new TextPositionSequence(sequences.get(i).getPage());
|
||||
for (int j = 0; j < sequences.get(i).length(); j++) {
|
||||
|
||||
if (i > 0 && j == 0 && sequences.get(i).charAt(0, caseInsensitive) == ' ' && sequences.get(i - 1)
|
||||
.charAt(sequences.get(i - 1).length() - 1, caseInsensitive) == ' ' || j > 0 && sequences.get(i)
|
||||
.charAt(j, caseInsensitive) == ' ' && sequences.get(i).charAt(j - 1, caseInsensitive) == ' ') {
|
||||
if (j == sequences.get(i).length() - 1 && counter != 0 && !partMatch.getTextPositions().isEmpty()) {
|
||||
List<TextPositionSequence> searchSpace;
|
||||
if (sequencesSubList != null) {
|
||||
int subListIndex = Collections.indexOfSubList(sequences, sequencesSubList);
|
||||
if (subListIndex != -1) {
|
||||
searchSpace = sequences.subList(subListIndex, subListIndex + sequencesSubList.size());
|
||||
} else {
|
||||
searchSpace = sequences;
|
||||
}
|
||||
} else {
|
||||
searchSpace = sequences;
|
||||
}
|
||||
|
||||
for (int i = 0; i < searchSpace.size(); i++) {
|
||||
TextPositionSequence partMatch = new TextPositionSequence(searchSpace.get(i).getPage());
|
||||
for (int j = 0; j < searchSpace.get(i).length(); j++) {
|
||||
|
||||
if (i > 0 && j == 0 && searchSpace.get(i).charAt(0, caseInsensitive) == ' ' && searchSpace.get(i - 1)
|
||||
.charAt(searchSpace.get(i - 1).length() - 1, caseInsensitive) == ' ' || j > 0 && searchSpace.get(i)
|
||||
.charAt(j, caseInsensitive) == ' ' && searchSpace.get(i).charAt(j - 1, caseInsensitive) == ' ') {
|
||||
if (j == searchSpace.get(i).length() - 1 && counter != 0 && !partMatch.getTextPositions().isEmpty()) {
|
||||
crossSequenceParts.add(partMatch);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (j == 0 && sequences.get(i).charAt(j, caseInsensitive) != ' ' && i != 0 && sequences.get(i - 1)
|
||||
.charAt(sequences.get(i - 1)
|
||||
if (j == 0 && searchSpace.get(i).charAt(j, caseInsensitive) != ' ' && i != 0 && searchSpace.get(i - 1)
|
||||
.charAt(searchSpace.get(i - 1)
|
||||
.length() - 1, caseInsensitive) != ' ' && searchChars[counter] == ' ') {
|
||||
counter++;
|
||||
}
|
||||
|
||||
if (sequences.get(i)
|
||||
.charAt(j, caseInsensitive) == searchChars[counter] || counter != 0 && sequences.get(i)
|
||||
if (searchSpace.get(i)
|
||||
.charAt(j, caseInsensitive) == searchChars[counter] || counter != 0 && searchSpace.get(i)
|
||||
.charAt(j, caseInsensitive) == '-') {
|
||||
|
||||
if (counter != 0 || i == 0 && j == 0 || j != 0 && isSeparator(sequences.get(i)
|
||||
.charAt(j - 1, caseInsensitive)) || j == 0 && i != 0 && isSeparator(sequences.get(i - 1)
|
||||
.charAt(sequences.get(i - 1)
|
||||
.length() - 1, caseInsensitive)) || j == 0 && i != 0 && sequences.get(i - 1)
|
||||
.charAt(sequences.get(i - 1).length() - 1, caseInsensitive) != ' ' && sequences.get(i)
|
||||
if (counter != 0 || i == 0 && j == 0 || j != 0 && isSeparator(searchSpace.get(i)
|
||||
.charAt(j - 1, caseInsensitive)) || j == 0 && i != 0 && isSeparator(searchSpace.get(i - 1)
|
||||
.charAt(searchSpace.get(i - 1)
|
||||
.length() - 1, caseInsensitive)) || j == 0 && i != 0 && searchSpace.get(i - 1)
|
||||
.charAt(searchSpace.get(i - 1).length() - 1, caseInsensitive) != ' ' && searchSpace.get(i)
|
||||
.charAt(j, caseInsensitive) != ' ') {
|
||||
partMatch.add(sequences.get(i).textPositionAt(j));
|
||||
if (!(j == sequences.get(i).length() - 1 && sequences.get(i)
|
||||
partMatch.add(searchSpace.get(i).textPositionAt(j));
|
||||
if (!(j == searchSpace.get(i).length() - 1 && searchSpace.get(i)
|
||||
.charAt(j, caseInsensitive) == '-' && searchChars[counter] != '-')) {
|
||||
counter++;
|
||||
}
|
||||
@ -79,19 +100,19 @@ public class SearchableText {
|
||||
if (counter == searchString.length()) {
|
||||
crossSequenceParts.add(partMatch);
|
||||
|
||||
if (i == sequences.size() - 1 && j == sequences.get(i).length() - 1 || j != sequences.get(i)
|
||||
.length() - 1 && isSeparator(sequences.get(i)
|
||||
.charAt(j + 1, caseInsensitive)) || j == sequences.get(i)
|
||||
.length() - 1 && isSeparator(sequences.get(i + 1)
|
||||
.charAt(0, caseInsensitive)) || j == sequences.get(i).length() - 1 && sequences.get(i)
|
||||
.charAt(j, caseInsensitive) != ' ' && sequences.get(i + 1)
|
||||
if (i == searchSpace.size() - 1 && j == searchSpace.get(i).length() - 1 || j != searchSpace.get(i)
|
||||
.length() - 1 && isSeparator(searchSpace.get(i)
|
||||
.charAt(j + 1, caseInsensitive)) || j == searchSpace.get(i)
|
||||
.length() - 1 && isSeparator(searchSpace.get(i + 1)
|
||||
.charAt(0, caseInsensitive)) || j == searchSpace.get(i).length() - 1 && searchSpace.get(i)
|
||||
.charAt(j, caseInsensitive) != ' ' && searchSpace.get(i + 1)
|
||||
.charAt(0, caseInsensitive) != ' ') {
|
||||
finalMatches.addAll(buildEntityPositionSequence(crossSequenceParts));
|
||||
}
|
||||
|
||||
counter = 0;
|
||||
crossSequenceParts = new ArrayList<>();
|
||||
partMatch = new TextPositionSequence(sequences.get(i).getPage());
|
||||
partMatch = new TextPositionSequence(searchSpace.get(i).getPage());
|
||||
}
|
||||
} else {
|
||||
counter = 0;
|
||||
@ -99,16 +120,17 @@ public class SearchableText {
|
||||
j--;
|
||||
}
|
||||
crossSequenceParts = new ArrayList<>();
|
||||
partMatch = new TextPositionSequence(sequences.get(i).getPage());
|
||||
partMatch = new TextPositionSequence(searchSpace.get(i).getPage());
|
||||
}
|
||||
|
||||
if (j == sequences.get(i).length() - 1 && counter != 0) {
|
||||
if (j == searchSpace.get(i).length() - 1 && counter != 0) {
|
||||
crossSequenceParts.add(partMatch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return finalMatches;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -7,9 +7,10 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
@ -31,7 +32,21 @@ public class Section {
|
||||
|
||||
private int sectionNumber;
|
||||
|
||||
private Map<String, String> tabularData;
|
||||
private Map<String, TextBlock> tabularData;
|
||||
|
||||
|
||||
public boolean isVertebrateStudy() {
|
||||
return tabularData != null
|
||||
&& tabularData.containsKey("Vertebrate study Y/N")
|
||||
&& tabularData.get("Vertebrate study Y/N").getText().equals("Y");
|
||||
}
|
||||
|
||||
|
||||
public boolean isNotVertebrateStudy() {
|
||||
return tabularData != null
|
||||
&& tabularData.containsKey("Vertebrate study Y/N")
|
||||
&& tabularData.get("Vertebrate study Y/N").getText().equals("N");
|
||||
}
|
||||
|
||||
|
||||
public boolean contains(String type) {
|
||||
@ -163,20 +178,16 @@ public class Section {
|
||||
|
||||
public void highlightCell(String cellHeader, int ruleNumber) {
|
||||
|
||||
String value = tabularData.get(cellHeader);
|
||||
TextBlock value = tabularData.get(cellHeader);
|
||||
if (value == null) {
|
||||
log.warn("Could not find any data for {}.", cellHeader);
|
||||
} else {
|
||||
Set<Entity> found = findEntities(value, "must_redact");
|
||||
if (CollectionUtils.isEmpty(found)) {
|
||||
log.warn("Could not identify value {} in row.", value);
|
||||
} else {
|
||||
Entity entity = found.iterator().next();
|
||||
entity.setRedaction(false);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
entity.setRedactionReason(cellHeader);
|
||||
entities.add(entity);
|
||||
}
|
||||
Entity entity = new Entity(value.getText(), "must_redact", 0, value.getText().length(), headline, sectionNumber);
|
||||
entity.setRedaction(false);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
entity.setRedactionReason(cellHeader);
|
||||
entity.setTargetSequences(value.getSequences()); // Make sure no other cells with same content are highlighted
|
||||
entities.add(entity);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -51,24 +51,27 @@ public class EntityRedactionService {
|
||||
List<Table> tables = paragraph.getTables();
|
||||
|
||||
for (Table table : tables) {
|
||||
List<String> metadata = table.getHeaders();
|
||||
for (List<Cell> row : table.getRows()) {
|
||||
SearchableText searchableRow = new SearchableText();
|
||||
List<String> cellValues = new ArrayList<>();
|
||||
Map<String, TextBlock> tabularData = new HashMap<>();
|
||||
for (Cell cell : row) {
|
||||
if (cell == null || CollectionUtils.isEmpty(cell.getTextBlocks())) {
|
||||
cellValues.add(null);
|
||||
if (cell.isHeaderCell() || CollectionUtils.isEmpty(cell.getTextBlocks())) {
|
||||
continue;
|
||||
}
|
||||
cellValues.add(cell.getTextBlocks().get(0).getText());
|
||||
addSectionToManualRedactions(cell.getTextBlocks(), manualRedactions, table.getHeadline(), sectionNumber);
|
||||
cell.getHeaderCells().forEach(headerCell -> {
|
||||
String headerName = headerCell.getTextBlocks().get(0).getText()
|
||||
.replaceAll("\n", " ")
|
||||
.replaceAll(" ", " ");
|
||||
tabularData.put(headerName, cell.getTextBlocks().get(0));
|
||||
});
|
||||
for (TextBlock textBlock : cell.getTextBlocks()) {
|
||||
searchableRow.addAll(textBlock.getSequences());
|
||||
}
|
||||
|
||||
}
|
||||
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber);
|
||||
|
||||
Map<String, String> tabularData = toMap(metadata, cellValues);
|
||||
Section analysedRowSection = droolsExecutionService.executeRules(Section.builder()
|
||||
.entities(rowEntities)
|
||||
.text(searchableRow.getAsStringWithLinebreaks())
|
||||
@ -116,35 +119,15 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
|
||||
private Map<String, String> toMap(List<String> keys, List<String> values) {
|
||||
|
||||
if (keys.size() != values.size()) {
|
||||
log.warn("Cannot merge lists of unequal size, returning empty map.");
|
||||
return new HashMap<>();
|
||||
}
|
||||
Map<String, String> result = new HashMap<>();
|
||||
for (int i = 0; i < keys.size(); i++) {
|
||||
String value = values.get(i);
|
||||
if (value == null) {
|
||||
continue;
|
||||
}
|
||||
result.put(keys.get(i), value);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> clearAndFindPositions(Set<Entity> entities, SearchableText text) {
|
||||
|
||||
removeEntitiesContainedInLarger(entities);
|
||||
|
||||
for (Entity entity : entities) {
|
||||
if (dictionaryService.getCaseInsensitiveTypes().contains(entity.getType())) {
|
||||
entity.setPositionSequences(text.getSequences(entity.getWord(), true));
|
||||
entity.setPositionSequences(text.getSequences(entity.getWord(), true, entity.getTargetSequences()));
|
||||
} else {
|
||||
entity.setPositionSequences(text.getSequences(entity.getWord(), false));
|
||||
entity.setPositionSequences(text.getSequences(entity.getWord(), false, entity.getTargetSequences()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,11 +1,11 @@
|
||||
package com.iqser.red.service.redaction.v1.server.segmentation;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
@ -13,6 +13,7 @@ import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
@Service
|
||||
@ -85,10 +86,20 @@ public class SectionsBuilderService {
|
||||
table.setHeadline("Table in: " + lastHeadline);
|
||||
}
|
||||
// Distribute header information for subsequent tables
|
||||
if (previousTable != null && hasInvalidHeaderInformation(table) && hasValidHeaderInformation(previousTable) &&
|
||||
(previousTable.isVerticalHeader() && previousTable.getRowCount() == table.getRowCount() ||
|
||||
previousTable.getColCount() == table.getColCount())) {
|
||||
table.setHeaders(previousTable.getHeaders());
|
||||
if (previousTable != null && hasInvalidHeaderInformation(table) && hasValidHeaderInformation(previousTable)) {
|
||||
List<Cell> previousTableNonHeaderRow = getRowWithNonHeaderCells(previousTable);
|
||||
List<Cell> tableNonHeaderRow = getRowWithNonHeaderCells(table);
|
||||
if (previousTableNonHeaderRow.size() == tableNonHeaderRow.size()) {
|
||||
for (int i = table.getRows().size() - 1; i >= 0; i--) { // Non header rows are most likely at bottom of table
|
||||
List<Cell> row = table.getRows().get(i);
|
||||
if (row.size() == previousTableNonHeaderRow.size()
|
||||
&& row.stream().allMatch(cell -> cell.getHeaderCells().isEmpty())) {
|
||||
for (int j = 0; j < row.size(); j++) {
|
||||
row.get(j).setHeaderCells(previousTableNonHeaderRow.get(j).getHeaderCells());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (textBlock != null && !alreadyAdded) {
|
||||
@ -141,14 +152,32 @@ public class SectionsBuilderService {
|
||||
|
||||
private boolean hasInvalidHeaderInformation(Table table) {
|
||||
|
||||
if (CollectionUtils.isEmpty(table.getHeaders())) {
|
||||
return true;
|
||||
}
|
||||
if (table.getHeaders().stream().anyMatch(StringUtils::isEmpty)) {
|
||||
return true;
|
||||
return table.getRows().stream()
|
||||
.flatMap(row -> row.stream()
|
||||
.filter(cell -> CollectionUtils.isNotEmpty(cell.getHeaderCells())))
|
||||
.findAny()
|
||||
.isEmpty();
|
||||
|
||||
}
|
||||
|
||||
|
||||
private List<Cell> getRowWithNonHeaderCells(Table table) {
|
||||
|
||||
for (int i = table.getRows().size() - 1; i >= 0; i--) { // Non header rows are most likely at bottom of table
|
||||
List<Cell> row = table.getRows().get(i);
|
||||
boolean allNonHeader = true;
|
||||
for (Cell cell : row) {
|
||||
if (cell.isHeaderCell()) {
|
||||
allNonHeader = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (allNonHeader) {
|
||||
return row;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
return Collections.emptyList();
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -16,10 +16,14 @@ public class Cell extends Rectangle {
|
||||
|
||||
private List<TextBlock> textBlocks = new ArrayList<>();
|
||||
|
||||
private List<Cell> headerCells = new ArrayList<>();
|
||||
|
||||
private boolean isHeaderCell;
|
||||
|
||||
public Cell(Point2D topLeft, Point2D bottomRight) {
|
||||
|
||||
super((float) topLeft.getY(), (float) topLeft.getX(), (float) (bottomRight.getX() - topLeft.getX()), (float) (bottomRight
|
||||
super((float) topLeft.getY(), (float) topLeft.getX(), (float) (bottomRight.getX() - topLeft.getX()),
|
||||
(float) (bottomRight
|
||||
.getY() - topLeft.getY()));
|
||||
}
|
||||
|
||||
@ -29,4 +33,4 @@ public class Cell extends Rectangle {
|
||||
textBlocks.add(textBlock);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,22 @@
|
||||
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.Value;
|
||||
|
||||
@Value
|
||||
@RequiredArgsConstructor
|
||||
public class CellPosition implements Comparable<CellPosition> {
|
||||
|
||||
int row;
|
||||
|
||||
int col;
|
||||
|
||||
|
||||
@Override
|
||||
public int compareTo(CellPosition other) {
|
||||
|
||||
int rowDiff = row - other.row;
|
||||
return rowDiff != 0 ? rowDiff : col - other.col;
|
||||
}
|
||||
|
||||
}
|
||||
@ -8,12 +8,10 @@ import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||
|
||||
import lombok.Getter;
|
||||
@ -41,12 +39,6 @@ public class Table extends AbstractTextContainer {
|
||||
|
||||
private List<List<Cell>> rows;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
private List<String> headers;
|
||||
|
||||
@Getter
|
||||
private boolean verticalHeader;
|
||||
|
||||
public Table(List<Cell> cells, Rectangle area, int rotation) {
|
||||
|
||||
@ -65,7 +57,7 @@ public class Table extends AbstractTextContainer {
|
||||
|
||||
if (rows == null) {
|
||||
rows = computeRows();
|
||||
headers = computeHeaders();
|
||||
computeHeaders();
|
||||
}
|
||||
|
||||
return rows;
|
||||
@ -78,72 +70,105 @@ public class Table extends AbstractTextContainer {
|
||||
* Column is marked as header if cell text is bold and row cell text is not bold.
|
||||
* Defaults to row.
|
||||
*/
|
||||
private List<String> computeHeaders() {
|
||||
private void computeHeaders() {
|
||||
|
||||
boolean allBold = true;
|
||||
if (rows.isEmpty()) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
List<Cell> rowCells = rows.get(0);
|
||||
for (Cell cell : rowCells) {
|
||||
if (cell == null || CollectionUtils.isEmpty(cell.getTextBlocks()) ||
|
||||
!cell.getTextBlocks().get(0).getMostPopularWordStyle().equals("bold")) {
|
||||
allBold = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!allBold) {
|
||||
allBold = true;
|
||||
List<Cell> firstColCells = new ArrayList<>();
|
||||
for (List<Cell> row : rows) {
|
||||
Cell firstInRow = row.get(0);
|
||||
if (firstInRow == null || CollectionUtils.isEmpty(firstInRow.getTextBlocks()) ||
|
||||
!firstInRow.getTextBlocks().get(0).getMostPopularWordStyle().equals("bold")) {
|
||||
allBold = false;
|
||||
// A bold cell is a header cell as long as every cell to the left/top is bold, too
|
||||
cells.forEach((position, cell) -> {
|
||||
List<Cell> cellsToTheLeft = getCellsToTheLeft(position);
|
||||
Cell lastHeaderCell = null;
|
||||
for (Cell leftCell : cellsToTheLeft) {
|
||||
if (CollectionUtils.isNotEmpty(leftCell.getTextBlocks()) && leftCell.getTextBlocks()
|
||||
.get(0)
|
||||
.getMostPopularWordStyle()
|
||||
.equals("bold")) {
|
||||
lastHeaderCell = leftCell;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
firstColCells.add(firstInRow);
|
||||
}
|
||||
if (allBold) {
|
||||
log.info("Headers are in first column");
|
||||
verticalHeader = true;
|
||||
return firstColCells.stream().map(cell -> {
|
||||
if (CollectionUtils.isNotEmpty(cell.getTextBlocks())) {
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(cell.getTextBlocks().get(0).getText())
|
||||
.replaceAll("\n", " ")
|
||||
.replaceAll(" ", " ");
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}).collect(Collectors.toList());
|
||||
} else {
|
||||
log.info("Headers are defaulted in first row.");
|
||||
return rowCells.stream().map(cell -> {
|
||||
if (cell != null && CollectionUtils.isNotEmpty(cell.getTextBlocks())) {
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(cell.getTextBlocks().get(0).getText())
|
||||
.replaceAll("\n", " ")
|
||||
.replaceAll(" ", " ");
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}).collect(Collectors.toList());
|
||||
if (lastHeaderCell != null) {
|
||||
cell.getHeaderCells().add(lastHeaderCell);
|
||||
}
|
||||
} else {
|
||||
log.info("Headers are in first row.");
|
||||
return rowCells.stream().map(cell -> {
|
||||
if (CollectionUtils.isNotEmpty(cell.getTextBlocks())) {
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(cell.getTextBlocks().get(0).getText())
|
||||
.replaceAll("\n", " ")
|
||||
.replaceAll(" ", " ");
|
||||
lastHeaderCell = null;
|
||||
List<Cell> cellsToTheTop = getCellToTheTop(position);
|
||||
for (Cell topCell : cellsToTheTop) {
|
||||
if (CollectionUtils.isNotEmpty(topCell.getTextBlocks()) && topCell.getTextBlocks()
|
||||
.get(0)
|
||||
.getMostPopularWordStyle()
|
||||
.equals("bold")) {
|
||||
lastHeaderCell = topCell;
|
||||
} else {
|
||||
return null;
|
||||
break;
|
||||
}
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
if (lastHeaderCell != null) {
|
||||
cell.getHeaderCells().add(lastHeaderCell);
|
||||
}
|
||||
if (CollectionUtils.isNotEmpty(cell.getTextBlocks()) && cell.getTextBlocks()
|
||||
.get(0)
|
||||
.getMostPopularWordStyle()
|
||||
.equals("bold")) {
|
||||
cell.setHeaderCell(true);
|
||||
}
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
|
||||
private List<Cell> getCellsToTheLeft(CellPosition cellPosition) {
|
||||
|
||||
List<Cell> result = new ArrayList<>();
|
||||
if (cellPosition.getCol() == 0) {
|
||||
return result;
|
||||
}
|
||||
int row = cellPosition.getRow();
|
||||
for (int i = cellPosition.getCol() - 1; i >= 0; i--) {
|
||||
if (cells.get(new CellPosition(row, i)) != null) {
|
||||
result.add(cells.get(new CellPosition(row, i)));
|
||||
} else {
|
||||
Cell spanningCell = null;
|
||||
while (spanningCell == null && row >= 0) {
|
||||
row--;
|
||||
spanningCell = cells.get(new CellPosition(row, i));
|
||||
}
|
||||
if (spanningCell != null) {
|
||||
result.add(spanningCell);
|
||||
}
|
||||
row = cellPosition.getRow();
|
||||
}
|
||||
}
|
||||
Collections.reverse(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
private List<Cell> getCellToTheTop(CellPosition cellPosition) {
|
||||
|
||||
List<Cell> result = new ArrayList<>();
|
||||
if (cellPosition.getRow() == 0) {
|
||||
return result;
|
||||
}
|
||||
int col = cellPosition.getCol();
|
||||
for (int i = cellPosition.getRow() - 1; i >= 0; i--) {
|
||||
if (cells.get(new CellPosition(i, col)) != null) {
|
||||
result.add(cells.get(new CellPosition(i, col)));
|
||||
} else {
|
||||
Cell spanningCell = null;
|
||||
while (spanningCell == null && col >= 0) {
|
||||
col--;
|
||||
spanningCell = cells.get(new CellPosition(i, col));
|
||||
}
|
||||
if (spanningCell != null) {
|
||||
result.add(spanningCell);
|
||||
}
|
||||
col = cellPosition.getCol();
|
||||
}
|
||||
}
|
||||
Collections.reverse(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
private List<List<Cell>> computeRows() {
|
||||
|
||||
List<List<Cell>> rows = new ArrayList<>();
|
||||
@ -152,7 +177,9 @@ public class Table extends AbstractTextContainer {
|
||||
List<Cell> lastRow = new ArrayList<>();
|
||||
for (int j = rowCount - 1; j >= 0; j--) { // cols
|
||||
Cell cell = cells.get(new CellPosition(j, i));
|
||||
lastRow.add(cell);
|
||||
if (cell != null) {
|
||||
lastRow.add(cell);
|
||||
}
|
||||
}
|
||||
rows.add(lastRow);
|
||||
}
|
||||
@ -161,7 +188,9 @@ public class Table extends AbstractTextContainer {
|
||||
List<Cell> lastRow = new ArrayList<>();
|
||||
for (int j = 0; j < rowCount; j++) { // cols
|
||||
Cell cell = cells.get(new CellPosition(i, j));
|
||||
lastRow.add(cell);
|
||||
if (cell != null) {
|
||||
lastRow.add(cell);
|
||||
}
|
||||
}
|
||||
rows.add(lastRow);
|
||||
}
|
||||
@ -170,7 +199,9 @@ public class Table extends AbstractTextContainer {
|
||||
List<Cell> lastRow = new ArrayList<>();
|
||||
for (int j = 0; j < colCount; j++) {
|
||||
Cell cell = cells.get(new CellPosition(i, j)); // JAVA_8 use getOrDefault()
|
||||
lastRow.add(cell);
|
||||
if (cell != null) {
|
||||
lastRow.add(cell);
|
||||
}
|
||||
}
|
||||
rows.add(lastRow);
|
||||
}
|
||||
@ -220,20 +251,21 @@ public class Table extends AbstractTextContainer {
|
||||
while (rowCells.hasNext()) {
|
||||
Cell cell = rowCells.next();
|
||||
if (i > 0) {
|
||||
List<List<Cell>> others = rowsOfCells(si.contains(new Rectangle(cell.getBottom(), si.getBounds()
|
||||
.getLeft(), cell.getLeft() - si.getBounds().getLeft() + 1, si.getBounds().getBottom() - cell
|
||||
.getBottom())));
|
||||
Rectangle rectangle = new Rectangle(cell.getBottom(),
|
||||
si.getBounds().getLeft(),
|
||||
cell.getLeft() - si.getBounds().getLeft() + 1,
|
||||
si.getBounds().getBottom() - cell.getBottom());
|
||||
List<List<Cell>> others = rowsOfCells(si.contains(rectangle));
|
||||
|
||||
for (List<Cell> r : others) {
|
||||
jumpToColumn = Math.max(jumpToColumn, r.size());
|
||||
}
|
||||
}
|
||||
|
||||
while (startColumn != jumpToColumn) {
|
||||
add(previousNonNullCellForColumnIndex.get(startColumn), i, startColumn);
|
||||
startColumn++;
|
||||
while (startColumn != jumpToColumn) {
|
||||
add(previousNonNullCellForColumnIndex.get(startColumn), i, startColumn);
|
||||
startColumn++;
|
||||
}
|
||||
}
|
||||
|
||||
add(cell, i, startColumn);
|
||||
previousNonNullCellForColumnIndex.put(startColumn, cell);
|
||||
startColumn++;
|
||||
@ -243,26 +275,23 @@ public class Table extends AbstractTextContainer {
|
||||
}
|
||||
|
||||
|
||||
private static List<List<Cell>> rowsOfCells(List<Cell> cells) {
|
||||
private List<List<Cell>> rowsOfCells(List<Cell> cells) {
|
||||
|
||||
Cell c;
|
||||
float lastTop;
|
||||
List<List<Cell>> rv = new ArrayList<>();
|
||||
List<Cell> lastRow;
|
||||
|
||||
if (cells.isEmpty()) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
cells.sort(Comparator.comparingDouble(Rectangle::getLeft));
|
||||
|
||||
cells.sort(Collections.reverseOrder((arg0, arg1) -> Float.compare(Utils.round(arg0.getBottom(), 2), Utils.round(arg1
|
||||
cells.sort(Collections.reverseOrder((arg0, arg1) -> Float.compare(Utils.round(arg0.getBottom(), 2),
|
||||
Utils.round(arg1
|
||||
.getBottom(), 2))));
|
||||
|
||||
Iterator<Cell> iter = cells.iterator();
|
||||
c = iter.next();
|
||||
lastTop = c.getBottom();
|
||||
lastRow = new ArrayList<>();
|
||||
Cell c = iter.next();
|
||||
float lastTop = c.getBottom();
|
||||
List<Cell> lastRow = new ArrayList<>();
|
||||
lastRow.add(c);
|
||||
rv.add(lastRow);
|
||||
|
||||
@ -349,51 +378,4 @@ public class Table extends AbstractTextContainer {
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
static class CellPosition implements Comparable<CellPosition> {
|
||||
|
||||
CellPosition(int row, int col) {
|
||||
|
||||
this.row = row;
|
||||
this.col = col;
|
||||
}
|
||||
|
||||
|
||||
final int row;
|
||||
final int col;
|
||||
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
||||
return row + 101 * col;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
if (obj == null) {
|
||||
return false;
|
||||
}
|
||||
if (getClass() != obj.getClass()) {
|
||||
return false;
|
||||
}
|
||||
CellPosition other = (CellPosition) obj;
|
||||
return row == other.row && col == other.col;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int compareTo(CellPosition other) {
|
||||
|
||||
int rowDiff = row - other.row;
|
||||
return rowDiff != 0 ? rowDiff : col - other.col;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -2,7 +2,6 @@ package com.iqser.red.service.redaction.v1.server.tableextraction.service;
|
||||
|
||||
import java.awt.geom.Point2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
@ -25,26 +24,28 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||
|
||||
@Service
|
||||
@SuppressWarnings("all")
|
||||
public class TableExtractionService {
|
||||
|
||||
public void extractTables(CleanRulings cleanRulings, Page page){
|
||||
public void extractTables(CleanRulings cleanRulings, Page page) {
|
||||
|
||||
List<Cell> cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical());
|
||||
|
||||
Iterator<AbstractTextContainer> itty = page.getTextBlocks().iterator();
|
||||
while (itty.hasNext()) {
|
||||
TextBlock textBlock = (TextBlock) itty.next();
|
||||
for (AbstractTextContainer abstractTextContainer : page.getTextBlocks()) {
|
||||
TextBlock textBlock = (TextBlock) abstractTextContainer;
|
||||
for (Cell cell : cells) {
|
||||
if (cell.intersects(textBlock.getMinX(), textBlock.getMinY(), textBlock.getWidth(), textBlock.getHeight())) {
|
||||
if (cell.intersects(textBlock.getMinX(), textBlock.getMinY(), textBlock.getWidth(),
|
||||
textBlock.getHeight())) {
|
||||
cell.addTextBlock(textBlock);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
List<Rectangle> spreadsheetAreas = findSpreadsheetsFromCells(cells)
|
||||
.stream()
|
||||
cells = new ArrayList<>(new HashSet<>(cells));
|
||||
Utils.sort(cells, Rectangle.ILL_DEFINED_ORDER);
|
||||
|
||||
|
||||
List<Rectangle> spreadsheetAreas = findSpreadsheetsFromCells(cells).stream()
|
||||
.filter(r -> r.getWidth() > 0f && r.getHeight() > 0f)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
@ -63,9 +64,9 @@ public class TableExtractionService {
|
||||
for (Table table : tables) {
|
||||
int position = -1;
|
||||
|
||||
itty = page.getTextBlocks().iterator();
|
||||
Iterator<AbstractTextContainer> itty = page.getTextBlocks().iterator();
|
||||
while (itty.hasNext()) {
|
||||
AbstractTextContainer textBlock = (AbstractTextContainer) itty.next();
|
||||
AbstractTextContainer textBlock = itty.next();
|
||||
if (table.contains(textBlock)) {
|
||||
if (position == -1) {
|
||||
position = page.getTextBlocks().indexOf(textBlock);
|
||||
@ -79,17 +80,18 @@ public class TableExtractionService {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public List<Cell> findCells(List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
|
||||
|
||||
List<Cell> cellsFound = new ArrayList<>();
|
||||
Map<Point2D, Ruling[]> intersectionPoints = Ruling.findIntersections(horizontalRulingLines, verticalRulingLines);
|
||||
Map<Point2D, Ruling[]> intersectionPoints = Ruling.findIntersections(horizontalRulingLines,
|
||||
verticalRulingLines);
|
||||
List<Point2D> intersectionPointsList = new ArrayList<>(intersectionPoints.keySet());
|
||||
Collections.sort(intersectionPointsList, POINT_COMPARATOR);
|
||||
boolean doBreak;
|
||||
intersectionPointsList.sort(POINT_COMPARATOR);
|
||||
|
||||
for (int i = 0; i < intersectionPointsList.size(); i++) {
|
||||
Point2D topLeft = intersectionPointsList.get(i);
|
||||
Ruling[] hv = intersectionPoints.get(topLeft);
|
||||
doBreak = false;
|
||||
|
||||
// CrossingPointsDirectlyBelow( topLeft );
|
||||
List<Point2D> xPoints = new ArrayList<>();
|
||||
@ -106,10 +108,6 @@ public class TableExtractionService {
|
||||
}
|
||||
outer:
|
||||
for (Point2D xPoint : xPoints) {
|
||||
if (doBreak) {
|
||||
break;
|
||||
}
|
||||
|
||||
// is there a vertical edge b/w topLeft and xPoint?
|
||||
if (!hv[1].equals(intersectionPoints.get(xPoint)[1])) {
|
||||
continue;
|
||||
@ -120,11 +118,9 @@ public class TableExtractionService {
|
||||
continue;
|
||||
}
|
||||
Point2D btmRight = new Point2D.Float((float) yPoint.getX(), (float) xPoint.getY());
|
||||
if (intersectionPoints.containsKey(btmRight)
|
||||
&& intersectionPoints.get(btmRight)[0].equals(intersectionPoints.get(xPoint)[0])
|
||||
&& intersectionPoints.get(btmRight)[1].equals(intersectionPoints.get(yPoint)[1])) {
|
||||
if (intersectionPoints.containsKey(btmRight) && intersectionPoints.get(btmRight)[0].equals(intersectionPoints
|
||||
.get(xPoint)[0]) && intersectionPoints.get(btmRight)[1].equals(intersectionPoints.get(yPoint)[1])) {
|
||||
cellsFound.add(new Cell(topLeft, btmRight));
|
||||
doBreak = true;
|
||||
break outer;
|
||||
}
|
||||
}
|
||||
@ -139,7 +135,7 @@ public class TableExtractionService {
|
||||
}
|
||||
|
||||
|
||||
public List<Rectangle> findSpreadsheetsFromCells(List<? extends Rectangle> cells) {
|
||||
private List<Rectangle> findSpreadsheetsFromCells(List<? extends Rectangle> cells) {
|
||||
// via: http://stackoverflow.com/questions/13746284/merging-multiple-adjacent-rectangles-into-one-polygon
|
||||
List<Rectangle> rectangles = new ArrayList<>();
|
||||
Set<Point2D> pointSet = new HashSet<>();
|
||||
@ -147,10 +143,6 @@ public class TableExtractionService {
|
||||
Map<Point2D, Point2D> edgesV = new HashMap<>();
|
||||
int i = 0;
|
||||
|
||||
cells = new ArrayList<>(new HashSet<>(cells));
|
||||
|
||||
Utils.sort(cells, Rectangle.ILL_DEFINED_ORDER);
|
||||
|
||||
for (Rectangle cell : cells) {
|
||||
for (Point2D pt : cell.getPoints()) {
|
||||
if (pointSet.contains(pt)) { // shared vertex, remove it
|
||||
@ -163,10 +155,10 @@ public class TableExtractionService {
|
||||
|
||||
// X first sort
|
||||
List<Point2D> pointsSortX = new ArrayList<>(pointSet);
|
||||
Collections.sort(pointsSortX, X_FIRST_POINT_COMPARATOR);
|
||||
pointsSortX.sort(X_FIRST_POINT_COMPARATOR);
|
||||
// Y first sort
|
||||
List<Point2D> pointsSortY = new ArrayList<>(pointSet);
|
||||
Collections.sort(pointsSortY, POINT_COMPARATOR);
|
||||
pointsSortY.sort(POINT_COMPARATOR);
|
||||
|
||||
while (i < pointSet.size()) {
|
||||
float currY = (float) pointsSortY.get(i).getY();
|
||||
@ -203,13 +195,12 @@ public class TableExtractionService {
|
||||
nextVertex = edgesV.get(curr.point);
|
||||
edgesV.remove(curr.point);
|
||||
lastAddedVertex = new PolygonVertex(nextVertex, Direction.VERTICAL);
|
||||
polygon.add(lastAddedVertex);
|
||||
} else {
|
||||
nextVertex = edgesH.get(curr.point);
|
||||
edgesH.remove(curr.point);
|
||||
lastAddedVertex = new PolygonVertex(nextVertex, Direction.HORIZONTAL);
|
||||
polygon.add(lastAddedVertex);
|
||||
}
|
||||
polygon.add(lastAddedVertex);
|
||||
|
||||
if (lastAddedVertex.equals(polygon.get(0))) {
|
||||
// closed polygon
|
||||
@ -227,10 +218,10 @@ public class TableExtractionService {
|
||||
|
||||
// calculate grid-aligned minimum area rectangles for each found polygon
|
||||
for (List<PolygonVertex> poly : polygons) {
|
||||
float top = java.lang.Float.MAX_VALUE;
|
||||
float left = java.lang.Float.MAX_VALUE;
|
||||
float bottom = java.lang.Float.MIN_VALUE;
|
||||
float right = java.lang.Float.MIN_VALUE;
|
||||
float top = Float.MAX_VALUE;
|
||||
float left = Float.MAX_VALUE;
|
||||
float bottom = Float.MIN_VALUE;
|
||||
float right = Float.MIN_VALUE;
|
||||
for (PolygonVertex pt : poly) {
|
||||
top = (float) Math.min(top, pt.point.getY());
|
||||
left = (float) Math.min(left, pt.point.getX());
|
||||
@ -244,69 +235,66 @@ public class TableExtractionService {
|
||||
}
|
||||
|
||||
|
||||
private static final Comparator<Point2D> X_FIRST_POINT_COMPARATOR = new Comparator<Point2D>() {
|
||||
@Override
|
||||
public int compare(Point2D arg0, Point2D arg1) {
|
||||
int rv = 0;
|
||||
float arg0X = Utils.round(arg0.getX(), 2);
|
||||
float arg0Y = Utils.round(arg0.getY(), 2);
|
||||
float arg1X = Utils.round(arg1.getX(), 2);
|
||||
float arg1Y = Utils.round(arg1.getY(), 2);
|
||||
private static final Comparator<Point2D> X_FIRST_POINT_COMPARATOR = (arg0, arg1) -> {
|
||||
|
||||
if (arg0X > arg1X) {
|
||||
rv = 1;
|
||||
} else if (arg0X < arg1X) {
|
||||
rv = -1;
|
||||
} else if (arg0Y > arg1Y) {
|
||||
rv = 1;
|
||||
} else if (arg0Y < arg1Y) {
|
||||
rv = -1;
|
||||
}
|
||||
return rv;
|
||||
int rv = 0;
|
||||
float arg0X = Utils.round(arg0.getX(), 2);
|
||||
float arg0Y = Utils.round(arg0.getY(), 2);
|
||||
float arg1X = Utils.round(arg1.getX(), 2);
|
||||
float arg1Y = Utils.round(arg1.getY(), 2);
|
||||
|
||||
if (arg0X > arg1X) {
|
||||
rv = 1;
|
||||
} else if (arg0X < arg1X) {
|
||||
rv = -1;
|
||||
} else if (arg0Y > arg1Y) {
|
||||
rv = 1;
|
||||
} else if (arg0Y < arg1Y) {
|
||||
rv = -1;
|
||||
}
|
||||
return rv;
|
||||
};
|
||||
|
||||
private static final Comparator<Point2D> POINT_COMPARATOR = (arg0, arg1) -> {
|
||||
|
||||
private static final Comparator<Point2D> POINT_COMPARATOR = new Comparator<Point2D>() {
|
||||
@Override
|
||||
public int compare(Point2D arg0, Point2D arg1) {
|
||||
int rv = 0;
|
||||
float arg0X = Utils.round(arg0.getX(), 2);
|
||||
float arg0Y = Utils.round(arg0.getY(), 2);
|
||||
float arg1X = Utils.round(arg1.getX(), 2);
|
||||
float arg1Y = Utils.round(arg1.getY(), 2);
|
||||
int rv = 0;
|
||||
float arg0X = Utils.round(arg0.getX(), 2);
|
||||
float arg0Y = Utils.round(arg0.getY(), 2);
|
||||
float arg1X = Utils.round(arg1.getX(), 2);
|
||||
float arg1Y = Utils.round(arg1.getY(), 2);
|
||||
|
||||
|
||||
if (arg0Y > arg1Y) {
|
||||
rv = 1;
|
||||
} else if (arg0Y < arg1Y) {
|
||||
rv = -1;
|
||||
} else if (arg0X > arg1X) {
|
||||
rv = 1;
|
||||
} else if (arg0X < arg1X) {
|
||||
rv = -1;
|
||||
}
|
||||
return rv;
|
||||
if (arg0Y > arg1Y) {
|
||||
rv = 1;
|
||||
} else if (arg0Y < arg1Y) {
|
||||
rv = -1;
|
||||
} else if (arg0X > arg1X) {
|
||||
rv = 1;
|
||||
} else if (arg0X < arg1X) {
|
||||
rv = -1;
|
||||
}
|
||||
return rv;
|
||||
};
|
||||
|
||||
|
||||
private enum Direction {
|
||||
HORIZONTAL,
|
||||
VERTICAL
|
||||
HORIZONTAL, VERTICAL
|
||||
}
|
||||
|
||||
static class PolygonVertex {
|
||||
|
||||
Point2D point;
|
||||
Direction direction;
|
||||
|
||||
public PolygonVertex(Point2D point, Direction direction) {
|
||||
|
||||
PolygonVertex(Point2D point, Direction direction) {
|
||||
|
||||
this.direction = direction;
|
||||
this.point = point;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
|
||||
if (this == other) {
|
||||
return true;
|
||||
}
|
||||
@ -316,15 +304,21 @@ public class TableExtractionService {
|
||||
return this.point.equals(((PolygonVertex) other).point);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
||||
return this.point.hashCode();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%s[point=%s,direction=%s]", this.getClass().getName(), this.point.toString(), this.direction.toString());
|
||||
|
||||
return String.format("%s[point=%s,direction=%s]", this.getClass()
|
||||
.getName(), this.point.toString(), this.direction.toString());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -227,6 +227,7 @@ public class RedactionIntegrationTest {
|
||||
@Test
|
||||
public void noExceptionShouldBeThrownForAnyFiles() throws IOException {
|
||||
|
||||
System.out.println("noExceptionShouldBeThrownForAnyFiles");
|
||||
ClassLoader loader = getClass().getClassLoader();
|
||||
URL url = loader.getResource("files");
|
||||
File[] files = new File(url.getPath()).listFiles();
|
||||
@ -266,6 +267,7 @@ public class RedactionIntegrationTest {
|
||||
@Test
|
||||
public void redactionTest() throws IOException {
|
||||
|
||||
System.out.println("redactionTest");
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Trinexapac/96 Trinexapac-ethyl_RAR_09_Volume_3CA_B-7_2018-02-23.pdf");
|
||||
|
||||
@ -289,8 +291,9 @@ public class RedactionIntegrationTest {
|
||||
@Test
|
||||
public void testTableRedaction() throws IOException {
|
||||
|
||||
System.out.println("testTableRedaction");
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
@ -311,6 +314,7 @@ public class RedactionIntegrationTest {
|
||||
@Test
|
||||
public void testManualRedaction() throws IOException {
|
||||
|
||||
System.out.println("testManualRedaction");
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
|
||||
|
||||
@ -345,6 +349,7 @@ public class RedactionIntegrationTest {
|
||||
@Test
|
||||
public void classificationTest() throws IOException {
|
||||
|
||||
System.out.println("classificationTest");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 " +
|
||||
"Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
|
||||
|
||||
@ -363,6 +368,7 @@ public class RedactionIntegrationTest {
|
||||
@Test
|
||||
public void sectionsTest() throws IOException {
|
||||
|
||||
System.out.println("sectionsTest");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 " +
|
||||
"Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
|
||||
|
||||
@ -381,6 +387,7 @@ public class RedactionIntegrationTest {
|
||||
@Test
|
||||
public void htmlTablesTest() throws IOException {
|
||||
|
||||
System.out.println("htmlTablesTest");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 " +
|
||||
"Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
|
||||
|
||||
@ -399,6 +406,7 @@ public class RedactionIntegrationTest {
|
||||
@Test
|
||||
public void htmlTableRotationTest() throws IOException {
|
||||
|
||||
System.out.println("htmlTableRotationTest");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S" +
|
||||
"-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
||||
|
||||
|
||||
@ -10,13 +10,16 @@ import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.kie.api.KieServices;
|
||||
@ -45,14 +48,15 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||
|
||||
@RunWith(SpringRunner.class)
|
||||
@SpringBootTest
|
||||
@RunWith(SpringRunner.class)
|
||||
public class EntityRedactionServiceTest {
|
||||
|
||||
private static final String DEFAULT_RULES = loadFromClassPath("drools/rules.drl");
|
||||
private static final String NAME_CODE = "name";
|
||||
private static final String ADDRESS_CODE = "address";
|
||||
|
||||
private static final AtomicLong DICTIONARY_VERSION = new AtomicLong();
|
||||
@MockBean
|
||||
private DictionaryClient dictionaryClient;
|
||||
|
||||
@ -112,6 +116,111 @@ public class EntityRedactionServiceTest {
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.build();
|
||||
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H."))
|
||||
.build();
|
||||
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA"))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
|
||||
|
||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc, null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1)).hasSize(5); // 4 out of 5 entities recognized on page 1
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testTrueNegativesInTable() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Cyprodinil/40 Cyprodinil - EU AIR3 - LCA Section 1" +
|
||||
" Supplement - Identity of the active substance - Reference list.pdf");
|
||||
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/names.txt")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/addresses.txt")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
|
||||
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc, null);
|
||||
assertThat(classifiedDoc.getEntities()
|
||||
.entrySet()
|
||||
.stream()
|
||||
.noneMatch(entry -> entry.getValue().stream().anyMatch(e -> e.getMatchedRule() == 9))).isTrue();
|
||||
}
|
||||
pdfFileResource = new ClassPathResource("files/Compounds/27 A8637C - EU AIR3 - MCP Section 1 - Identity of " +
|
||||
"the plant protection product.pdf");
|
||||
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc, null);
|
||||
assertThat(classifiedDoc.getEntities()
|
||||
.entrySet()
|
||||
.stream()
|
||||
.noneMatch(entry -> entry.getValue().stream().anyMatch(e -> e.getMatchedRule() == 9))).isTrue();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFalsePositiveInWrongCell() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Row With Ambiguous Redaction.pdf");
|
||||
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/names.txt")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/addresses.txt")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
|
||||
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc, null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // two pages
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream()
|
||||
.filter(entity -> entity.getMatchedRule() == 9)
|
||||
.count()).isEqualTo(10);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void headerPropagation() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Header Propagation.pdf");
|
||||
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(Arrays.asList("Bissig R.", "Thanei P."))
|
||||
.build();
|
||||
|
||||
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland"))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
|
||||
|
||||
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc, null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(2); // two pages
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(8);
|
||||
assertThat(classifiedDoc.getEntities().get(2).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(4);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Before
|
||||
public void stubRedaction() {
|
||||
String tableRules = "package drools\n" +
|
||||
"\n" +
|
||||
"import com.iqser.red.service.redaction.v1.server.redaction.model.Section\n" +
|
||||
@ -119,10 +228,7 @@ public class EntityRedactionServiceTest {
|
||||
"global Section section\n" +
|
||||
"rule \"9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study\"\n" +
|
||||
" when\n" +
|
||||
" Section(tabularData != null && tabularData.size() > 0\n" +
|
||||
" && tabularData.containsKey(\"Vertebrate study Y/N\")\n" +
|
||||
" && tabularData.get(\"Vertebrate study Y/N\").equals(\"Y\")\n" +
|
||||
" )\n" +
|
||||
" Section(isVertebrateStudy())\n" +
|
||||
" then\n" +
|
||||
" section.redact(\"name\", 9, \"Redacted because row is a vertebrate study\");\n" +
|
||||
" section.redact(\"address\", 9, \"Redacted because rows is a vertebrate study\");\n" +
|
||||
@ -135,22 +241,9 @@ public class EntityRedactionServiceTest {
|
||||
TypeResult.builder().type(NAME_CODE).color(new float[]{1, 1, 0}).build(),
|
||||
TypeResult.builder().type(ADDRESS_CODE).color(new float[]{0, 1, 1}).build()))
|
||||
.build();
|
||||
when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getAllTypes()).thenReturn(typeResponse);
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H."))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA"))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDefaultColor()).thenReturn(new DefaultColor());
|
||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc, null);
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1)).hasSize(5); // 4 out of 5 entities recognized on page 1
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,67 @@
|
||||
package com.iqser.red.service.redaction.v1.server.segmentation;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
|
||||
|
||||
@SpringBootTest
|
||||
@RunWith(SpringRunner.class)
|
||||
public class PdfSegmentationServiceTest {
|
||||
|
||||
@Autowired
|
||||
private PdfSegmentationService pdfSegmentationService;
|
||||
|
||||
@Autowired
|
||||
private RulingCleaningService rulingCleaningService;
|
||||
|
||||
@Autowired
|
||||
private TableExtractionService tableExtractionService;
|
||||
|
||||
@Autowired
|
||||
private BlockificationService blockificationService;
|
||||
|
||||
@MockBean
|
||||
private KieContainer kieContainer;
|
||||
|
||||
|
||||
@Test
|
||||
public void testPDFSegmentationWithComplexTable() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf");
|
||||
|
||||
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||
Document document = pdfSegmentationService.parseDocument(pdDocument);
|
||||
assertThat(document.getParagraphs()
|
||||
.stream()
|
||||
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||
.collect(Collectors.toList())).isNotEmpty();
|
||||
Table table = document.getParagraphs()
|
||||
.stream()
|
||||
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||
.collect(Collectors.toList())
|
||||
.get(0);
|
||||
assertThat(table.getColCount()).isEqualTo(6);
|
||||
assertThat(table.getRowCount()).isEqualTo(13);
|
||||
assertThat(table.getRows().stream().mapToInt(List::size).sum()).isEqualTo(6 * 13);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -49,64 +49,69 @@ rule "5: Do not redact in guideline sections"
|
||||
section.redactNot("address", 5, "Section is a guideline section.");
|
||||
end
|
||||
|
||||
rule "6: Redact if must redact entry is found"
|
||||
when
|
||||
eval(section.contains("must_redact")==true);
|
||||
then
|
||||
section.redact("name", 6, "must_redact entry was found.");
|
||||
section.redact("address", 6, "must_redact entry was found.");
|
||||
end
|
||||
|
||||
|
||||
rule "7: Redact contact information, if applicant is found"
|
||||
rule "6: Redact contact information, if applicant is found"
|
||||
when
|
||||
eval(section.headlineContainsWord("applicant") || section.getText().contains("Applicant"));
|
||||
then
|
||||
section.redactLineAfter("Name:", "address", 7, "Applicant information was found");
|
||||
section.redactBetween("Address:", "Contact", "address", 7, "Applicant information was found");
|
||||
section.redactLineAfter("Contact point:", "address", 7, "Applicant information was found");
|
||||
section.redactLineAfter("Phone:", "address", 7, "Applicant information was found");
|
||||
section.redactLineAfter("Fax:", "address", 7, "Applicant information was found");
|
||||
section.redactLineAfter("Tel.:", "address", 7, "Applicant information was found");
|
||||
section.redactLineAfter("Tel:", "address", 7, "Applicant information was found");
|
||||
section.redactLineAfter("E-mail:", "address", 7, "Applicant information was found");
|
||||
section.redactLineAfter("Email:", "address", 7, "Applicant information was found");
|
||||
section.redactLineAfter("Contact:", "address", 7, "Applicant information was found");
|
||||
section.redactLineAfter("Telephone number:", "address", 7, "Applicant information was found");
|
||||
section.redactLineAfter("Fax number:", "address", 7, "Applicant information was found");
|
||||
section.redactLineAfter("Telephone:", "address", 7, "Applicant information was found");
|
||||
section.redactBetween("No:", "Fax", "address", 7, "Applicant information was found");
|
||||
section.redactBetween("Contact:", "Tel.:", "address", 7, "Applicant information was found");
|
||||
section.redactLineAfter("Name:", "address", 6, "Applicant information was found");
|
||||
section.redactBetween("Address:", "Contact", "address", 6, "Applicant information was found");
|
||||
section.redactLineAfter("Contact point:", "address", 6, "Applicant information was found");
|
||||
section.redactLineAfter("Phone:", "address", 6, "Applicant information was found");
|
||||
section.redactLineAfter("Fax:", "address", 6, "Applicant information was found");
|
||||
section.redactLineAfter("Tel.:", "address", 6, "Applicant information was found");
|
||||
section.redactLineAfter("Tel:", "address", 6, "Applicant information was found");
|
||||
section.redactLineAfter("E-mail:", "address", 6, "Applicant information was found");
|
||||
section.redactLineAfter("Email:", "address", 6, "Applicant information was found");
|
||||
section.redactLineAfter("Contact:", "address", 6, "Applicant information was found");
|
||||
section.redactLineAfter("Telephone number:", "address", 6, "Applicant information was found");
|
||||
section.redactLineAfter("Fax number:", "address", 6, "Applicant information was found");
|
||||
section.redactLineAfter("Telephone:", "address", 6, "Applicant information was found");
|
||||
section.redactBetween("No:", "Fax", "address", 6, "Applicant information was found");
|
||||
section.redactBetween("Contact:", "Tel.:", "address", 6, "Applicant information was found");
|
||||
end
|
||||
|
||||
rule "8: Redact contact information, if Producer is found"
|
||||
rule "7: Redact contact information, if Producer is found"
|
||||
when
|
||||
eval(section.getText().toLowerCase().contains("producer of the plant protection") || section.getText().toLowerCase().contains("producer of the active substance") || section.getText().contains("Manufacturer of the active substance") || section.getText().contains("Manufacturer:") || section.getText().contains("Producer or producers of the active substance"));
|
||||
then
|
||||
section.redactLineAfter("Name:", "address", 8, "Producer was found");
|
||||
section.redactBetween("Address:", "Contact", "address", 8, "Producer was found");
|
||||
section.redactBetween("Contact:", "Phone", "address", 8, "Producer was found");
|
||||
section.redactBetween("Contact:", "Telephone number:", "address", 8, "Producer was found");
|
||||
section.redactBetween("Address:", "Manufacturing", "address", 8, "Producer was found");
|
||||
section.redactLineAfter("Telephone:", "address", 8, "Producer was found");
|
||||
section.redactLineAfter("Phone:", "address", 8, "Producer was found");
|
||||
section.redactLineAfter("Fax:", "address", 8, "Producer was found");
|
||||
section.redactLineAfter("E-mail:", "address", 8, "Producer was found");
|
||||
section.redactLineAfter("Contact:", "address", 8, "Producer was found");
|
||||
section.redactLineAfter("Fax number:", "address", 8, "Producer was found");
|
||||
section.redactLineAfter("Telephone number:", "address", 8, "Producer was found");
|
||||
section.redactLineAfter("Tel:", "address", 8, "Producer was found");
|
||||
section.redactBetween("No:", "Fax", "address", 8, "Producer was found");
|
||||
section.redactLineAfter("Name:", "address", 7, "Producer was found");
|
||||
section.redactBetween("Address:", "Contact", "address", 7, "Producer was found");
|
||||
section.redactBetween("Contact:", "Phone", "address", 7, "Producer was found");
|
||||
section.redactBetween("Contact:", "Telephone number:", "address", 7, "Producer was found");
|
||||
section.redactBetween("Address:", "Manufacturing", "address", 7, "Producer was found");
|
||||
section.redactLineAfter("Telephone:", "address", 7, "Producer was found");
|
||||
section.redactLineAfter("Phone:", "address", 7, "Producer was found");
|
||||
section.redactLineAfter("Fax:", "address", 7, "Producer was found");
|
||||
section.redactLineAfter("E-mail:", "address", 7, "Producer was found");
|
||||
section.redactLineAfter("Contact:", "address", 7, "Producer was found");
|
||||
section.redactLineAfter("Fax number:", "address", 7, "Producer was found");
|
||||
section.redactLineAfter("Telephone number:", "address", 7, "Producer was found");
|
||||
section.redactLineAfter("Tel:", "address", 7, "Producer was found");
|
||||
section.redactBetween("No:", "Fax", "address", 7, "Producer was found");
|
||||
end
|
||||
|
||||
rule "9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study"
|
||||
rule "8: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study"
|
||||
when
|
||||
Section(tabularData != null
|
||||
&& tabularData.containsKey("Vertebrate study Y/N")
|
||||
&& tabularData.get("Vertebrate study Y/N").equals("Y")
|
||||
)
|
||||
Section(isVertebrateStudy())
|
||||
then
|
||||
section.redact("name", 9, "Redacted because row is a vertebrate study");
|
||||
section.redact("address", 9, "Redacted because rows is a vertebrate study");
|
||||
section.redact("name", 8, "Redacted because row is a vertebrate study");
|
||||
section.redact("address", 8, "Redacted because row is a vertebrate study");
|
||||
section.highlightCell("Vertebrate study Y/N", 9);
|
||||
end
|
||||
end
|
||||
|
||||
rule "9: Not redacted because Vertebrate Study = N"
|
||||
when
|
||||
Section(isNotVertebrateStudy())
|
||||
then
|
||||
section.redactNot("name", 9, "Not redacted because row is not a vertebrate study");
|
||||
section.redactNot("address", 9, "Not redacted because row is not a vertebrate study");
|
||||
end
|
||||
|
||||
|
||||
rule "10: Redact if must redact entry is found"
|
||||
when
|
||||
eval(section.contains("must_redact")==true);
|
||||
then
|
||||
section.redact("name", 10, "must_redact entry was found.");
|
||||
section.redact("address", 10, "must_redact entry was found.");
|
||||
end
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user