RED-1070: Fixed not finding annotation on not classified textblocks
This commit is contained in:
parent
53509f072e
commit
de1dea7ac3
@ -20,6 +20,7 @@ public class Document {
|
||||
private List<Paragraph> paragraphs = new ArrayList<>();
|
||||
private List<Header> headers = new ArrayList<>();
|
||||
private List<Footer> footers = new ArrayList<>();
|
||||
private List<UnclassifiedText> unclassifiedTexts = new ArrayList<>();
|
||||
private Map<Integer, List<Entity>> entities = new HashMap<>();
|
||||
private FloatFrequencyCounter textHeightCounter = new FloatFrequencyCounter();
|
||||
private FloatFrequencyCounter fontSizeCounter= new FloatFrequencyCounter();
|
||||
|
||||
@ -0,0 +1,24 @@
|
||||
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
public class UnclassifiedText {
|
||||
|
||||
private List<TextBlock> textBlocks;
|
||||
|
||||
|
||||
public SearchableText getSearchableText() {
|
||||
|
||||
SearchableText searchableText = new SearchableText();
|
||||
textBlocks.forEach(block -> searchableText.addAll(block.getSequences()));
|
||||
return searchableText;
|
||||
}
|
||||
|
||||
}
|
||||
@ -111,6 +111,8 @@ public class ClassificationService {
|
||||
textBlock.setClassification("TextBlock Italic");
|
||||
} else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock)) {
|
||||
textBlock.setClassification("TextBlock Unknown");
|
||||
} else {
|
||||
textBlock.setClassification("Other");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -23,6 +23,7 @@ import com.iqser.red.service.redaction.v1.server.classification.model.Footer;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Header;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.UnclassifiedText;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||
@ -130,6 +131,11 @@ public class EntityRedactionService {
|
||||
sectionNumber.incrementAndGet();
|
||||
}
|
||||
|
||||
for (UnclassifiedText unclassifiedText : classifiedDoc.getUnclassifiedTexts()) {
|
||||
sectionSearchableTextPairs.add(processText(unclassifiedText.getSearchableText(), unclassifiedText.getTextBlocks(), "", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber));
|
||||
sectionNumber.incrementAndGet();
|
||||
}
|
||||
|
||||
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
|
||||
Section analysedRowSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
|
||||
documentEntities.addAll(analysedRowSection.getEntities());
|
||||
|
||||
@ -15,6 +15,7 @@ import com.iqser.red.service.redaction.v1.server.classification.model.Header;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.UnclassifiedText;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
@ -28,6 +29,7 @@ public class SectionsBuilderService {
|
||||
List<Paragraph> chunkBlockList = new ArrayList<>();
|
||||
List<Header> headers = new ArrayList<>();
|
||||
List<Footer> footers = new ArrayList<>();
|
||||
List<UnclassifiedText> unclassifiedTexts = new ArrayList<>();
|
||||
|
||||
AbstractTextContainer prev = null;
|
||||
|
||||
@ -36,6 +38,7 @@ public class SectionsBuilderService {
|
||||
for (Page page : document.getPages()) {
|
||||
List<TextBlock> header = new ArrayList<>();
|
||||
List<TextBlock> footer = new ArrayList<>();
|
||||
List<TextBlock> unclassifiedText = new ArrayList<>();
|
||||
for (AbstractTextContainer current : page.getTextBlocks()) {
|
||||
|
||||
if (current.getClassification() == null) {
|
||||
@ -54,6 +57,11 @@ public class SectionsBuilderService {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (current.getClassification().equals("Other")) {
|
||||
unclassifiedText.add((TextBlock) current);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (prev != null && current.getClassification().startsWith("H ") && !prev.getClassification()
|
||||
.startsWith("H ") || !document.isHeadlines()) {
|
||||
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline);
|
||||
@ -78,6 +86,7 @@ public class SectionsBuilderService {
|
||||
}
|
||||
headers.add(new Header(header));
|
||||
footers.add(new Footer(footer));
|
||||
unclassifiedTexts.add(new UnclassifiedText(unclassifiedText));
|
||||
}
|
||||
|
||||
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline);
|
||||
@ -87,6 +96,7 @@ public class SectionsBuilderService {
|
||||
document.setParagraphs(chunkBlockList);
|
||||
document.setHeaders(headers);
|
||||
document.setFooters(footers);
|
||||
document.setUnclassifiedTexts(unclassifiedTexts);
|
||||
}
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user