Pull request #135: RED-1260: Enabled to add rules and manuel redaction actions for images

Merge in RED/redaction-service from RED-1260 to master

* commit '55ba351362785de41090fb4252c9ee7c4c486991':
  RED-1260: Enabled to add rules and manuel redaction actions for images
This commit is contained in:
Dominique Eiflaender 2021-04-15 13:08:24 +02:00
commit a7aa3a723a
19 changed files with 467 additions and 108 deletions

View File

@ -4,11 +4,13 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.SectionGrid;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.model.SectionText;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import lombok.Data;
import lombok.NoArgsConstructor;
@ -24,8 +26,8 @@ public class Document {
private List<UnclassifiedText> unclassifiedTexts = new ArrayList<>();
private Map<Integer, List<Entity>> entities = new HashMap<>();
private FloatFrequencyCounter textHeightCounter = new FloatFrequencyCounter();
private FloatFrequencyCounter fontSizeCounter= new FloatFrequencyCounter();
private StringFrequencyCounter fontCounter= new StringFrequencyCounter();
private FloatFrequencyCounter fontSizeCounter = new FloatFrequencyCounter();
private StringFrequencyCounter fontCounter = new StringFrequencyCounter();
private StringFrequencyCounter fontStyleCounter = new StringFrequencyCounter();
private boolean headlines;
@ -35,4 +37,7 @@ public class Document {
private long rulesVersion;
private List<SectionText> sectionText = new ArrayList<>();
private Map<Integer, Set<Image>> images = new HashMap<>();
}

View File

@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.classification.model;
import java.util.ArrayList;
import java.util.List;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
@ -12,9 +13,10 @@ import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
public class Paragraph {
public class Paragraph implements Comparable{
private List<AbstractTextContainer> pageBlocks = new ArrayList<>();
private List<PdfImage> images = new ArrayList<>();
private String headline;
@ -53,4 +55,11 @@ public class Paragraph {
return textBlocks;
}
@Override
public int compareTo(Object o) {
return 0;
}
}

View File

@ -67,8 +67,8 @@ public class RedactionController implements RedactionResource {
log.info("Document structure analysis successful, starting redaction analysis...");
entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getRuleSetId(), analyzeRequest.getManualRedactions());
imageClassificationService.classifyImages(classifiedDoc);
entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getRuleSetId(), analyzeRequest.getManualRedactions());
redactionLogCreatorService.createRedactionLog(classifiedDoc, pdDocument.getNumberOfPages(), analyzeRequest.getManualRedactions(), analyzeRequest
.getRuleSetId());

View File

@ -223,7 +223,7 @@ public class PDFLinesTextStripper extends PDFTextStripper {
.getWidth(), (float) imageBounds.getHeight());
if (rect.getHeight() > 2 && rect.getWidth() > 2) {
this.images.add(new PdfImage(pdfImage.getImage(), rect));
this.images.add(new PdfImage(pdfImage.getImage(), rect, pageNumber));
}
}
} catch (Exception e) {

View File

@ -0,0 +1,26 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.awt.geom.Rectangle2D;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class Image {
private String type;
private Rectangle2D position;
private boolean redaction;
private String redactionReason;
private String legalBasis;
private int matchedRule;
private int sectionNumber;
private String section;
private int page;
}

View File

@ -20,5 +20,9 @@ public class PdfImage {
@NonNull
private Rectangle2D position;
private ImageType imageType;
private boolean isAppendedToParagraph;
@NonNull
private int page;
}

View File

@ -1,8 +1,10 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
@ -18,6 +20,7 @@ public class ReanalysisSection {
private List<TextBlock> textBlocks;
private Map<String, CellValue> tabularData = new HashMap<>();
private List<Integer> cellStarts;
private Set<Image> images = new HashSet<>();
public SearchableText getSearchableText() {

View File

@ -51,6 +51,9 @@ public class Section {
private SearchableText searchableText;
@Builder.Default
private Set<Image> images = new HashSet<>();
public boolean rowEquals(String headerName, String value) {
@ -75,6 +78,12 @@ public class Section {
}
public boolean matchesImageType(String type) {
return images.stream().anyMatch(image -> image.getType().equals(type));
}
public boolean headlineContainsWord(String word) {
return StringUtils.containsIgnoreCase(headline, word);
@ -109,6 +118,19 @@ public class Section {
}
public void redactImage(String type, int ruleNumber, String reason, String legalBasis) {
images.forEach(image -> {
if (image.getType().equals(type)) {
image.setRedaction(true);
image.setMatchedRule(ruleNumber);
image.setRedactionReason(reason);
image.setLegalBasis(legalBasis);
}
});
}
public void redact(String type, int ruleNumber, String reason, String legalBasis) {
boolean hasRecommendationDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
@ -125,6 +147,18 @@ public class Section {
}
public void redactNotImage(String type, int ruleNumber, String reason) {
images.forEach(image -> {
if (image.getType().equals(type)) {
image.setRedaction(false);
image.setMatchedRule(ruleNumber);
image.setRedactionReason(reason);
}
});
}
public void redactNot(String type, int ruleNumber, String reason) {
boolean hasRecommendationDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
@ -140,7 +174,8 @@ public class Section {
}
public void expandToHintAnnotationByRegEx(String type, String pattern, boolean patternCaseInsensitive, int group, String asType) {
public void expandToHintAnnotationByRegEx(String type, String pattern, boolean patternCaseInsensitive, int group,
String asType) {
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);

View File

@ -4,6 +4,7 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
@ -19,22 +20,25 @@ import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
import com.iqser.red.service.redaction.v1.model.Point;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.model.SectionArea;
import com.iqser.red.service.redaction.v1.model.SectionText;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Footer;
import com.iqser.red.service.redaction.v1.server.classification.model.Header;
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.classification.model.UnclassifiedText;
import com.iqser.red.service.redaction.v1.model.SectionArea;
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair;
import com.iqser.red.service.redaction.v1.model.SectionText;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
@ -89,7 +93,8 @@ public class EntityRedactionService {
.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity
.getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity.getStart(), entity.getEnd()));
.getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity
.getStart(), entity.getEnd()));
}
}
@ -120,22 +125,24 @@ public class EntityRedactionService {
sectionNumber.incrementAndGet();
}
sectionSearchableTextPairs.add(processText(classifiedDoc, paragraph.getSearchableText(), paragraph.getTextBlocks(), paragraph
.getHeadline(), manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber));
.getHeadline(), manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, paragraph
.getImages()));
sectionNumber.incrementAndGet();
}
for (Header header : classifiedDoc.getHeaders()) {
sectionSearchableTextPairs.add(processText(classifiedDoc, header.getSearchableText(), header.getTextBlocks(), "Header", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber));
sectionSearchableTextPairs.add(processText(classifiedDoc, header.getSearchableText(), header.getTextBlocks(), "Header", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, new ArrayList<>()));
sectionNumber.incrementAndGet();
}
for (Footer footer : classifiedDoc.getFooters()) {
sectionSearchableTextPairs.add(processText(classifiedDoc, footer.getSearchableText(), footer.getTextBlocks(), "Footer", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber));
sectionSearchableTextPairs.add(processText(classifiedDoc, footer.getSearchableText(), footer.getTextBlocks(), "Footer", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, new ArrayList<>()));
sectionNumber.incrementAndGet();
}
for (UnclassifiedText unclassifiedText : classifiedDoc.getUnclassifiedTexts()) {
sectionSearchableTextPairs.add(processText(classifiedDoc, unclassifiedText.getSearchableText(), unclassifiedText.getTextBlocks(), "", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber));
sectionSearchableTextPairs.add(processText(classifiedDoc, unclassifiedText.getSearchableText(), unclassifiedText
.getTextBlocks(), "", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, new ArrayList<>()));
sectionNumber.incrementAndGet();
}
@ -143,6 +150,10 @@ public class EntityRedactionService {
Section analysedRowSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
documentEntities.addAll(analysedRowSection.getEntities());
for (Image image : analysedRowSection.getImages()) {
classifiedDoc.getImages().computeIfAbsent(image.getPage(), (a) -> new HashSet<>()).add(image);
}
analysedRowSection.getLocalDictionaryAdds().keySet().forEach(key -> {
if (dictionary.isRecommendation(key)) {
analysedRowSection.getLocalDictionaryAdds().get(key).forEach(value -> {
@ -172,7 +183,8 @@ public class EntityRedactionService {
}
private List<SectionSearchableTextPair> processTablePerRow(Document classifiedDoc, Table table, ManualRedactions manualRedactions,
private List<SectionSearchableTextPair> processTablePerRow(Document classifiedDoc, Table table,
ManualRedactions manualRedactions,
AtomicInteger sectionNumber, Dictionary dictionary,
boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber) {
@ -192,7 +204,11 @@ public class EntityRedactionService {
}
SectionArea sectionArea = new SectionArea(new Point((float) cell.getX(), (float) cell.getY()), (float) cell
.getWidth(), (float) cell.getHeight(), cell.getTextBlocks().get(0).getSequences().get(0).getPage());
.getWidth(), (float) cell.getHeight(), cell.getTextBlocks()
.get(0)
.getSequences()
.get(0)
.getPage());
sectionText.getSectionAreas().add(sectionArea);
addSectionToManualRedactions(cell.getTextBlocks(), manualRedactions, table.getHeadline(), sectionNumber.intValue());
@ -237,7 +253,7 @@ public class EntityRedactionService {
.dictionary(dictionary)
.build(), searchableRow));
if(!local) {
if (!local) {
sectionText.setText(searchableRow.toString());
sectionText.setHeadline(table.getHeadline());
sectionText.setSectionNumber(sectionNumber.intValue());
@ -252,7 +268,8 @@ public class EntityRedactionService {
}
private List<SectionSearchableTextPair> processTableAsOneText(Document classifiedDoc, Table table, ManualRedactions manualRedactions,
private List<SectionSearchableTextPair> processTableAsOneText(Document classifiedDoc, Table table,
ManualRedactions manualRedactions,
AtomicInteger sectionNumber, Dictionary dictionary,
boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber) {
@ -266,9 +283,13 @@ public class EntityRedactionService {
continue;
}
if(!local) {
if (!local) {
SectionArea sectionArea = new SectionArea(new Point((float) cell.getX(), (float) cell.getY()), (float) cell
.getWidth(), (float) cell.getHeight(), cell.getTextBlocks().get(0).getSequences().get(0).getPage());
.getWidth(), (float) cell.getHeight(), cell.getTextBlocks()
.get(0)
.getSequences()
.get(0)
.getPage());
sectionText.getSectionAreas().add(sectionArea);
}
@ -279,7 +300,6 @@ public class EntityRedactionService {
}
}
Set<Entity> rowEntities = findEntities(entireTableText, table.getHeadline(), sectionNumber.intValue(), dictionary, local);
surroundingWordsService.addSurroundingText(rowEntities, entireTableText, dictionary);
@ -297,7 +317,7 @@ public class EntityRedactionService {
.dictionary(dictionary)
.build(), entireTableText));
if(!local) {
if (!local) {
sectionText.setText(entireTableText.toString());
sectionText.setHeadline(table.getHeadline());
sectionText.setSectionNumber(sectionNumber.intValue());
@ -309,12 +329,14 @@ public class EntityRedactionService {
}
private SectionSearchableTextPair processText(Document classifiedDoc, SearchableText searchableText, List<TextBlock> paragraphTextBlocks,
String headline, ManualRedactions manualRedactions,
AtomicInteger sectionNumber, Dictionary dictionary, boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber) {
private SectionSearchableTextPair processText(Document classifiedDoc, SearchableText searchableText,
List<TextBlock> paragraphTextBlocks, String headline,
ManualRedactions manualRedactions, AtomicInteger sectionNumber,
Dictionary dictionary, boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber,
List<PdfImage> images) {
if(!local) {
if (!local) {
SectionText sectionText = new SectionText();
for (TextBlock paragraphTextBlock : paragraphTextBlocks) {
SectionArea sectionArea = new SectionArea(new Point(paragraphTextBlock.getMinX(), paragraphTextBlock.getMinY()), paragraphTextBlock
@ -345,12 +367,15 @@ public class EntityRedactionService {
.sectionNumber(sectionNumber.intValue())
.searchableText(searchableText)
.dictionary(dictionary)
.images(images.stream()
.map(image -> convert(image, sectionNumber.intValue(), headline))
.collect(Collectors.toSet()))
.build(), searchableText);
}
public Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber,
Dictionary dictionary, boolean local) {
Dictionary dictionary, boolean local) {
Set<Entity> found = new HashSet<>();
String searchableString = searchableText.toString();
@ -390,4 +415,18 @@ public class EntityRedactionService {
}
}
private Image convert(PdfImage pdfImage, int sectionNumber, String headline) {
return Image.builder()
.type(pdfImage.getImageType().equals(ImageType.OTHER) ? "image" : pdfImage.getImageType()
.name()
.toLowerCase(Locale.ROOT))
.position(pdfImage.getPosition())
.sectionNumber(sectionNumber)
.section(headline)
.page(pdfImage.getPage())
.build();
}
}

View File

@ -32,7 +32,7 @@ public class ImageClassificationService {
classifiedDoc.getPages().forEach(page -> {
page.getImages().forEach(image -> {
if(settings.isEnableImageClassification()) {
if (settings.isEnableImageClassification()) {
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
ImageIO.write(image.getImage(), "png", baos);
ImageClassificationResponse response = imageClassificationClient.classify(new MockMultipartFile("file", "Image.png", "image/png", baos

View File

@ -39,6 +39,7 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.ReanalysisSection;
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair;
@ -75,10 +76,14 @@ public class ReanalyzeService {
}
Set<Integer> sectionsToReanaylse = new HashSet<>();
Map<Integer, Set<Image>> imageEntries = new HashMap<>();
for (RedactionLogEntry entry : renalyzeRequest.getRedactionLog().getRedactionLogEntry()) {
if (entry.isManual() || manualForceAndRemoveIds.contains(entry.getId())) {
sectionsToReanaylse.add(entry.getSectionNumber());
}
if (entry.isImage() || entry.getType().equals("image")) {
imageEntries.computeIfAbsent(entry.getSectionNumber(), x -> new HashSet<>()).add(convert(entry));
}
}
for (SectionText sectionText : renalyzeRequest.getText().getSectionTexts()) {
@ -173,10 +178,15 @@ public class ReanalyzeService {
}
reanalysisSection.setTextBlocks(textBlocks);
reanalysisSection.setTabularData(tabularData);
reanalysisSections.add(reanalysisSection);
if (sectionText.isTable()) {
reanalysisSection.setCellStarts(cellStarts);
}
if (imageEntries.containsKey(sectionText.getSectionNumber())) {
reanalysisSection.getImages().addAll(imageEntries.get(sectionText.getSectionNumber()));
}
reanalysisSections.add(reanalysisSection);
}
//--
@ -208,14 +218,22 @@ public class ReanalyzeService {
.tabularData(reanalysisSection.getTabularData())
.searchableText(reanalysisSection.getSearchableText())
.dictionary(dictionary)
.images(reanalysisSection.getImages())
.build(), reanalysisSection.getSearchableText()));
}
Set<Entity> entities = new HashSet<>();
Map<Integer, Set<Image>> imagesPerPage = new HashMap<>();
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
Section analysedRowSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair
.getSection());
entities.addAll(analysedRowSection.getEntities());
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
for (Image image : analysedRowSection.getImages()) {
imagesPerPage.computeIfAbsent(image.getPage(), (a) -> new HashSet<>()).add(image);
}
});
Map<Integer, List<Entity>> entitiesPerPage = new HashMap<>();
@ -241,6 +259,12 @@ public class ReanalyzeService {
newRedactionLogEntries.addAll(redactionLogCreatorService.addEntries(entitiesPerPage, renalyzeRequest
.getManualRedactions(), page, renalyzeRequest.getRuleSetId()));
}
if (imagesPerPage.get(page) != null) {
newRedactionLogEntries.addAll(redactionLogCreatorService.addImageEntries(imagesPerPage, renalyzeRequest
.getManualRedactions(), page, renalyzeRequest.getRuleSetId()));
}
newRedactionLogEntries.addAll(redactionLogCreatorService.addManualAddEntries(manualAdds, comments, page, renalyzeRequest
.getRuleSetId()));
}
@ -248,12 +272,13 @@ public class ReanalyzeService {
Iterator<RedactionLogEntry> itty = renalyzeRequest.getRedactionLog().getRedactionLogEntry().iterator();
while (itty.hasNext()) {
RedactionLogEntry entry = itty.next();
if (sectionsToReanaylse.contains(entry.getSectionNumber()) && !entry.isImage() || entry.getSectionNumber() == 0 && !entry.isImage()) {
if (sectionsToReanaylse.contains(entry.getSectionNumber())) {
itty.remove();
}
}
renalyzeRequest.getRedactionLog().getRedactionLogEntry().addAll(newRedactionLogEntries);
renalyzeRequest.getRedactionLog().setDictionaryVersion(dictionaryIncrement.getDictionaryVersion());
return ReanalyzeResult.builder().redactionLog(renalyzeRequest.getRedactionLog()).build();
@ -277,4 +302,19 @@ public class ReanalyzeService {
.collect(Collectors.toSet());
}
public Image convert(RedactionLogEntry entry) {
Rectangle position = entry.getPositions().get(0);
return Image.builder()
.type(entry.getType())
.position(new Rectangle2D.Float(position.getTopLeft().getX(), position.getTopLeft()
.getY(), position.getWidth(), position.getHeight()))
.sectionNumber(entry.getSectionNumber())
.section(entry.getSection())
.page(position.getPage())
.build();
}
}

View File

@ -3,7 +3,6 @@ package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
@ -30,8 +29,7 @@ import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
@ -65,47 +63,101 @@ public class RedactionLogCreatorService {
.addAll(addManualAddEntries(manualRedactions.getEntriesToAdd(), manualRedactions.getComments(), page, ruleSetId));
}
if (!classifiedDoc.getPages().get(page - 1).getImages().isEmpty()) {
addImageEntries(classifiedDoc, page, ruleSetId);
if (classifiedDoc.getImages().get(page) != null && !classifiedDoc.getImages().get(page).isEmpty()) {
classifiedDoc.getRedactionLogEntities()
.addAll(addImageEntries(classifiedDoc.getImages(), manualRedactions, page, ruleSetId));
}
}
}
private void addImageEntries(Document classifiedDoc, int pageNumber, String ruleSetId) {
public List<RedactionLogEntry> addImageEntries(Map<Integer, Set<Image>> images, ManualRedactions manualRedactions,
int pageNumber, String ruleSetId) {
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
for (Image image : images.get(pageNumber)) {
String id = IdBuilder.buildId(image.getPosition(), pageNumber);
for (PdfImage image : classifiedDoc.getPages().get(pageNumber - 1).getImages()) {
RedactionLogEntry redactionLogEntry = RedactionLogEntry.builder()
.id(IdBuilder.buildId(image.getPosition(), pageNumber))
.color(getColor(image.getImageType().name().toLowerCase(Locale.ROOT), ruleSetId))
.id(id)
.color(getColorForImage(image, ruleSetId, false))
.isImage(true)
.type(image.getImageType().equals(ImageType.OTHER) ? "image" : image.getImageType().name().toLowerCase(Locale.ROOT))
.redacted(isImageRedactionType(image.getImageType()))
.isHint(!isImageRedactionType(image.getImageType()))
.type(image.getType())
.redacted(image.isRedaction())
.reason(image.getRedactionReason())
.legalBasis(image.getLegalBasis())
.matchedRule(image.getMatchedRule())
.isHint(dictionaryService.isHint(image.getType(), ruleSetId))
.manual(false)
.isDictionaryEntry(false)
.isRecommendation(false)
.positions(List.of(new Rectangle(new Point((float) image.getPosition()
.getX(), (float) image.getPosition().getY()), (float) image.getPosition()
.getWidth(), (float) image.getPosition().getHeight(), pageNumber)))
.sectionNumber(image.getSectionNumber())
.section(image.getSection())
.build();
classifiedDoc.getRedactionLogEntities().add(redactionLogEntry);
}
}
if (manualRedactions != null && !manualRedactions.getIdsToRemove().isEmpty()) {
for (IdRemoval manualRemoval : manualRedactions.getIdsToRemove()) {
if (manualRemoval.getId().equals(id)) {
String manualOverrideReason = null;
if (manualRemoval.getStatus().equals(Status.APPROVED)) {
image.setRedaction(false);
redactionLogEntry.setRedacted(false);
redactionLogEntry.setStatus(Status.APPROVED);
manualOverrideReason = image.getRedactionReason() + ", removed by manual override";
redactionLogEntry.setColor(getColorForImage(image, ruleSetId, false));
} else if (manualRemoval.getStatus().equals(Status.REQUESTED)) {
manualOverrideReason = image.getRedactionReason() + ", requested to remove";
redactionLogEntry.setStatus(Status.REQUESTED);
redactionLogEntry.setColor(getColorForImage(image, ruleSetId, true));
} else {
redactionLogEntry.setStatus(Status.DECLINED);
}
private boolean isImageRedactionType(ImageType imageType) {
image.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : image.getRedactionReason());
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
redactionLogEntry.setManualRedactionType(ManualRedactionType.REMOVE);
}
}
}
if (imageType.equals(ImageType.LOGO)) {
return true;
if (manualRedactions != null && !manualRedactions.getForceRedacts().isEmpty()) {
for (ManualForceRedact manualForceRedact : manualRedactions.getForceRedacts()) {
if (manualForceRedact.getId().equals(id)) {
String manualOverrideReason = null;
if (manualForceRedact.getStatus().equals(Status.APPROVED)) {
image.setRedaction(true);
redactionLogEntry.setRedacted(true);
redactionLogEntry.setStatus(Status.APPROVED);
redactionLogEntry.setColor(getColorForImage(image, ruleSetId, false));
manualOverrideReason = image.getRedactionReason() + ", forced by manual override";
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
} else if (manualForceRedact.getStatus().equals(Status.REQUESTED)) {
manualOverrideReason = image.getRedactionReason() + ", requested to force redact";
redactionLogEntry.setStatus(Status.REQUESTED);
redactionLogEntry.setColor(getColorForImage(image, ruleSetId, true));
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
} else {
redactionLogEntry.setStatus(Status.DECLINED);
}
image.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : image.getRedactionReason());
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
redactionLogEntry.setManualRedactionType(ManualRedactionType.FORCE_REDACT);
}
}
}
redactionLogEntities.add(redactionLogEntry);
}
if (imageType.equals(ImageType.FORMULA)) {
return true;
}
if (imageType.equals(ImageType.SIGNATURE)) {
return true;
}
return false;
return redactionLogEntities;
}
@ -372,6 +424,18 @@ public class RedactionLogCreatorService {
}
private float[] getColorForImage(Image image, String ruleSetId, boolean requestedToRemove) {
if (requestedToRemove) {
return dictionaryService.getRequestRemoveColor(ruleSetId);
}
if (!image.isRedaction() && !dictionaryService.isHint(image.getType(), ruleSetId)) {
return dictionaryService.getNotRedactedColor(ruleSetId);
}
return dictionaryService.getColor(image.getType(), ruleSetId);
}
private boolean isHint(Entity entity, String ruleSetId) {
return dictionaryService.isHint(entity.getType(), ruleSetId);

View File

@ -71,6 +71,7 @@ public class PdfSegmentationService {
page.setPageNumber(pageNumber);
increaseDocumentStatistics(page, document);
page.setImages(stripper.getImages());
pages.add(page);
}
@ -78,8 +79,8 @@ public class PdfSegmentationService {
document.setPages(pages);
classificationService.classifyDocument(document);
sectionsBuilderService.buildSections(document);
sectionsBuilderService.addImagesToSections(document);
return document;
}

View File

@ -2,8 +2,12 @@ package com.iqser.red.service.redaction.v1.server.segmentation;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.stream.Collectors;
import org.apache.commons.collections4.CollectionUtils;
@ -16,6 +20,7 @@ import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.classification.model.UnclassifiedText;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
@ -85,13 +90,13 @@ public class SectionsBuilderService {
prev = current;
}
if(!header.isEmpty()) {
if (!header.isEmpty()) {
headers.add(new Header(header));
}
if(!footer.isEmpty()) {
if (!footer.isEmpty()) {
footers.add(new Footer(footer));
}
if(!unclassifiedText.isEmpty()) {
if (!unclassifiedText.isEmpty()) {
unclassifiedTexts.add(new UnclassifiedText(unclassifiedText));
}
}
@ -107,6 +112,53 @@ public class SectionsBuilderService {
}
public void addImagesToSections(Document document) {
Map<Integer, SortedSet<Paragraph>> paragraphMap = new HashMap<>();
for (Paragraph paragraph : document.getParagraphs()) {
for (AbstractTextContainer container : paragraph.getPageBlocks()) {
paragraphMap.computeIfAbsent(container.getPage(), x -> new TreeSet<>()).add(paragraph);
}
}
for (Page page : document.getPages()) {
for (PdfImage image : page.getImages()) {
SortedSet<Paragraph> paragraphsOnPage = paragraphMap.get(page.getPageNumber());
if (paragraphsOnPage == null) {
int i = page.getPageNumber();
while (paragraphsOnPage == null) {
paragraphsOnPage = paragraphMap.get(i);
i--;
}
}
Float perviousEnd = 0f;
for (Paragraph paragraph : paragraphsOnPage) {
Float currentEnd = 0f;
for (AbstractTextContainer abs : paragraph.getPageBlocks()) {
if (abs.getPage() != page.getPageNumber()) {
continue;
}
if (abs.getMaxY() > currentEnd) {
currentEnd = abs.getMaxY();
}
}
if (image.getPosition().getY() >= perviousEnd && image.getPosition().getY() <= currentEnd) {
paragraph.getImages().add(image);
image.setAppendedToParagraph(true);
}
perviousEnd = currentEnd;
}
if (!image.isAppendedToParagraph()) {
paragraphsOnPage.first().getImages().add(image);
image.setAppendedToParagraph(true);
}
}
}
}
private void mergeTableMetadata(Table currentTable, Table previousTable) {
// Distribute header information for subsequent tables

View File

@ -4,12 +4,8 @@ import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;
import static org.springframework.boot.test.context.SpringBootTest.WebEnvironment.RANDOM_PORT;
import java.awt.Color;
import java.awt.geom.AffineTransform;
import java.awt.geom.Rectangle2D;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
@ -21,7 +17,6 @@ import java.nio.charset.StandardCharsets;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@ -29,15 +24,7 @@ import java.util.UUID;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.text.PDFTextStripperByArea;
import org.apache.pdfbox.util.Matrix;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.kie.api.KieServices;
@ -45,7 +32,6 @@ import org.kie.api.builder.KieBuilder;
import org.kie.api.builder.KieFileSystem;
import org.kie.api.builder.KieModule;
import org.kie.api.runtime.KieContainer;
import org.mockito.MockitoAnnotations;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.context.TestConfiguration;
@ -77,23 +63,14 @@ import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
import com.iqser.red.service.redaction.v1.model.RedactionResult;
import com.iqser.red.service.redaction.v1.model.RenalyzeRequest;
import com.iqser.red.service.redaction.v1.model.SectionArea;
import com.iqser.red.service.redaction.v1.model.SectionText;
import com.iqser.red.service.redaction.v1.model.Status;
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
import com.iqser.red.service.redaction.v1.server.parsing.PDFAreaTextStripper;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
import com.iqser.red.service.redaction.v1.server.redaction.model.ReanalysisSection;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
@RunWith(SpringRunner.class)
@SpringBootTest(webEnvironment = RANDOM_PORT)
@ -112,6 +89,10 @@ public class RedactionIntegrationTest {
private static final String TEST_METHOD = "test_method";
private static final String PURITY = "purity";
private static final String IMAGE = "image";
private static final String LOGO = "logo";
private static final String SIGNATURE = "signature";
private static final String FORMULA = "formula";
private static final String OCR = "ocr";
private static final String RECOMMENDATION_AUTHOR = "recommendation_CBI_author";
private static final String RECOMMENDATION_ADDRESS = "recommendation_CBI_address";
@ -196,6 +177,10 @@ public class RedactionIntegrationTest {
when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(FALSE_POSITIVE));
when(dictionaryClient.getDictionaryForType(PURITY, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(PURITY));
when(dictionaryClient.getDictionaryForType(IMAGE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(IMAGE));
when(dictionaryClient.getDictionaryForType(OCR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(OCR));
when(dictionaryClient.getDictionaryForType(LOGO, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(LOGO));
when(dictionaryClient.getDictionaryForType(SIGNATURE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(SIGNATURE));
when(dictionaryClient.getDictionaryForType(FORMULA, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(FORMULA));
when(dictionaryClient.getColors(TEST_RULESET_ID)).thenReturn(colors);
}
@ -278,7 +263,27 @@ public class RedactionIntegrationTest {
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/image.txt")
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(OCR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(LOGO, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(SIGNATURE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(FORMULA, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/empty.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
@ -309,6 +314,10 @@ public class RedactionIntegrationTest {
typeColorMap.put(FALSE_POSITIVE, "#ffffff");
typeColorMap.put(PURITY, "#ffe187");
typeColorMap.put(IMAGE, "#fcc5fb");
typeColorMap.put(OCR, "#fcc5fb");
typeColorMap.put(LOGO, "#ffe187");
typeColorMap.put(FORMULA, "#ffe187");
typeColorMap.put(SIGNATURE, "#ffe187");
hintTypeMap.put(VERTEBRATE, true);
hintTypeMap.put(ADDRESS, false);
@ -326,6 +335,10 @@ public class RedactionIntegrationTest {
hintTypeMap.put(FALSE_POSITIVE, true);
hintTypeMap.put(PURITY, false);
hintTypeMap.put(IMAGE, true);
hintTypeMap.put(OCR, true);
hintTypeMap.put(FORMULA, false);
hintTypeMap.put(LOGO, false);
hintTypeMap.put(SIGNATURE, false);
caseInSensitiveMap.put(VERTEBRATE, true);
caseInSensitiveMap.put(ADDRESS, false);
@ -343,6 +356,10 @@ public class RedactionIntegrationTest {
caseInSensitiveMap.put(FALSE_POSITIVE, false);
caseInSensitiveMap.put(PURITY, false);
caseInSensitiveMap.put(IMAGE, true);
caseInSensitiveMap.put(OCR, true);
caseInSensitiveMap.put(SIGNATURE, true);
caseInSensitiveMap.put(LOGO, true);
caseInSensitiveMap.put(FORMULA, true);
recommendationTypeMap.put(VERTEBRATE, false);
recommendationTypeMap.put(ADDRESS, false);
@ -360,6 +377,10 @@ public class RedactionIntegrationTest {
recommendationTypeMap.put(FALSE_POSITIVE, false);
recommendationTypeMap.put(PURITY, false);
recommendationTypeMap.put(IMAGE, false);
recommendationTypeMap.put(OCR, false);
recommendationTypeMap.put(FORMULA, false);
recommendationTypeMap.put(SIGNATURE, false);
recommendationTypeMap.put(LOGO, false);
rankTypeMap.put(FALSE_POSITIVE, 160);
rankTypeMap.put(PURITY, 155);
@ -377,6 +398,10 @@ public class RedactionIntegrationTest {
rankTypeMap.put(RECOMMENDATION_AUTHOR, 40);
rankTypeMap.put(RECOMMENDATION_ADDRESS, 30);
rankTypeMap.put(IMAGE, 30);
rankTypeMap.put(OCR, 29);
rankTypeMap.put(LOGO, 28);
rankTypeMap.put(SIGNATURE, 27);
rankTypeMap.put(FORMULA, 26);
colors.setDefaultColor("#acfc00");
colors.setNotRedacted("#cccccc");
@ -563,7 +588,6 @@ public class RedactionIntegrationTest {
ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder()
.redactionLog(result.getRedactionLog())
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
.manualRedactions(null)
.text(result.getText())
.ruleSetId(TEST_RULESET_ID)
.build());

View File

@ -2,12 +2,17 @@ package com.iqser.red.service.redaction.v1.server.segmentation;
import static org.assertj.core.api.Assertions.assertThat;
import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import javax.imageio.ImageIO;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.kie.api.runtime.KieContainer;
@ -18,7 +23,10 @@ import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit4.SpringRunner;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
@ -45,24 +53,25 @@ public class PdfSegmentationServiceTest {
@Test
public void testPDFSegmentationWithComplexTable() throws IOException {
@Ignore
public void testExtractImages() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/new/Single Study - Oral (Gavage) Mouse.pdf");
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
Document document = pdfSegmentationService.parseDocument(pdDocument);
assertThat(document.getParagraphs()
.stream()
.flatMap(paragraph -> paragraph.getTables().stream())
.collect(Collectors.toList())).isNotEmpty();
Table table = document.getParagraphs()
.stream()
.flatMap(paragraph -> paragraph.getTables().stream())
.collect(Collectors.toList())
.get(0);
assertThat(table.getColCount()).isEqualTo(6);
assertThat(table.getRowCount()).isEqualTo(13);
assertThat(table.getRows().stream().mapToInt(List::size).sum()).isEqualTo(6 * 13);
int i = 0;
for (Page page : document.getPages()) {
for (PdfImage image : page.getImages()) {
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
ImageIO.write(image.getImage(), "png", baos);
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Image " + i + ".png")) {
fileOutputStream.write(baos.toByteArray());
}
}
i++;
}
}
}
}
@ -97,12 +106,12 @@ public class PdfSegmentationServiceTest {
.stream()
.map(Collections::singletonList)
.collect(Collectors.toList());
assertThat(secondTable.getRows().stream()
assertThat(secondTable.getRows()
.stream()
.allMatch(row -> row.stream()
.map(Cell::getHeaderCells)
.collect(Collectors.toList())
.equals(firstTableHeaderCells)))
.isTrue();
.equals(firstTableHeaderCells))).isTrue();
}
}
@ -137,12 +146,12 @@ public class PdfSegmentationServiceTest {
.stream()
.map(Cell::getHeaderCells)
.collect(Collectors.toList());
assertThat(secondTable.getRows().stream()
assertThat(secondTable.getRows()
.stream()
.allMatch(row -> row.stream()
.map(Cell::getHeaderCells)
.collect(Collectors.toList())
.equals(firstTableHeaderCells)))
.isTrue();
.equals(firstTableHeaderCells))).isTrue();
}
}
@ -177,12 +186,12 @@ public class PdfSegmentationServiceTest {
.stream()
.map(Collections::singletonList)
.collect(Collectors.toList());
assertThat(secondTable.getRows().stream()
assertThat(secondTable.getRows()
.stream()
.allMatch(row -> row.stream()
.map(Cell::getHeaderCells)
.collect(Collectors.toList())
.equals(firstTableHeaderCells)))
.isTrue();
.equals(firstTableHeaderCells))).isTrue();
}
}

View File

@ -258,4 +258,28 @@ rule "22: Redact Must Redact"
Section(matchesType("must_redact"))
then
section.redact("must_redact", 22, "Must Redact found", "Article 39(1)(2) of Regulation (EC) No 178/2002");
end
rule "23: Redact signatures"
when
Section(matchesImageType("signature"))
then
section.redactImage("signature", 23, "Signature found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
end
rule "24: Redact formula"
when
Section(matchesImageType("formula"))
then
section.redactImage("formula", 24, "Formula found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
end
rule "25: Redact Logos"
when
Section(matchesImageType("logo"))
then
section.redactImage("logo", 25, "Logo found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
end

View File

@ -304,4 +304,28 @@ rule "25: Redact Purity"
Section(searchText.contains("purity"))
then
section.redactByRegEx("purity ?:? (([\\d\\.]+)( .{0,4}\\.)? ?%)", true, 1, "purity", 17, "Purity found", "Reg (EC) No 1107/2009 Art. 63 (2a)");
end
rule "26: Redact signatures"
when
Section(matchesImageType("signature"))
then
section.redactImage("signature", 26, "Signature found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
end
rule "27: Redact formula"
when
Section(matchesImageType("formula"))
then
section.redactImage("formula", 27, "Formula found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
end
rule "28: Redact Logos"
when
Section(matchesImageType("logo"))
then
section.redactImage("logo", 28, "Logo found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
end