RED-1070: Fixed not finding annotation on not classified textblocks

This commit is contained in:
Dominique Eifländer 2021-02-18 08:58:51 +01:00
parent 53509f072e
commit de1dea7ac3
5 changed files with 43 additions and 0 deletions

View File

@ -20,6 +20,7 @@ public class Document {
private List<Paragraph> paragraphs = new ArrayList<>();
private List<Header> headers = new ArrayList<>();
private List<Footer> footers = new ArrayList<>();
private List<UnclassifiedText> unclassifiedTexts = new ArrayList<>();
private Map<Integer, List<Entity>> entities = new HashMap<>();
private FloatFrequencyCounter textHeightCounter = new FloatFrequencyCounter();
private FloatFrequencyCounter fontSizeCounter= new FloatFrequencyCounter();

View File

@ -0,0 +1,24 @@
package com.iqser.red.service.redaction.v1.server.classification.model;
import java.util.List;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import lombok.AllArgsConstructor;
import lombok.Data;
@Data
@AllArgsConstructor
public class UnclassifiedText {
private List<TextBlock> textBlocks;
public SearchableText getSearchableText() {
SearchableText searchableText = new SearchableText();
textBlocks.forEach(block -> searchableText.addAll(block.getSequences()));
return searchableText;
}
}

View File

@ -111,6 +111,8 @@ public class ClassificationService {
textBlock.setClassification("TextBlock Italic");
} else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock)) {
textBlock.setClassification("TextBlock Unknown");
} else {
textBlock.setClassification("Other");
}
}

View File

@ -23,6 +23,7 @@ import com.iqser.red.service.redaction.v1.server.classification.model.Footer;
import com.iqser.red.service.redaction.v1.server.classification.model.Header;
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.classification.model.UnclassifiedText;
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
@ -130,6 +131,11 @@ public class EntityRedactionService {
sectionNumber.incrementAndGet();
}
for (UnclassifiedText unclassifiedText : classifiedDoc.getUnclassifiedTexts()) {
sectionSearchableTextPairs.add(processText(unclassifiedText.getSearchableText(), unclassifiedText.getTextBlocks(), "", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber));
sectionNumber.incrementAndGet();
}
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
Section analysedRowSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
documentEntities.addAll(analysedRowSection.getEntities());

View File

@ -15,6 +15,7 @@ import com.iqser.red.service.redaction.v1.server.classification.model.Header;
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.classification.model.UnclassifiedText;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
@ -28,6 +29,7 @@ public class SectionsBuilderService {
List<Paragraph> chunkBlockList = new ArrayList<>();
List<Header> headers = new ArrayList<>();
List<Footer> footers = new ArrayList<>();
List<UnclassifiedText> unclassifiedTexts = new ArrayList<>();
AbstractTextContainer prev = null;
@ -36,6 +38,7 @@ public class SectionsBuilderService {
for (Page page : document.getPages()) {
List<TextBlock> header = new ArrayList<>();
List<TextBlock> footer = new ArrayList<>();
List<TextBlock> unclassifiedText = new ArrayList<>();
for (AbstractTextContainer current : page.getTextBlocks()) {
if (current.getClassification() == null) {
@ -54,6 +57,11 @@ public class SectionsBuilderService {
continue;
}
if (current.getClassification().equals("Other")) {
unclassifiedText.add((TextBlock) current);
continue;
}
if (prev != null && current.getClassification().startsWith("H ") && !prev.getClassification()
.startsWith("H ") || !document.isHeadlines()) {
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline);
@ -78,6 +86,7 @@ public class SectionsBuilderService {
}
headers.add(new Header(header));
footers.add(new Footer(footer));
unclassifiedTexts.add(new UnclassifiedText(unclassifiedText));
}
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline);
@ -87,6 +96,7 @@ public class SectionsBuilderService {
document.setParagraphs(chunkBlockList);
document.setHeaders(headers);
document.setFooters(footers);
document.setUnclassifiedTexts(unclassifiedTexts);
}