Let Tables know its headlines
This commit is contained in:
parent
135a715e22
commit
88e1c5c58e
@ -40,7 +40,6 @@ public class EntityRedactionService {
|
||||
|
||||
List<Table> tables = paragraph.getTables();
|
||||
|
||||
List<SearchableText> searchableRows = new ArrayList<>();
|
||||
for (Table table : tables) {
|
||||
for (List<Cell> row : table.getRows()) {
|
||||
SearchableText searchableRow = new SearchableText();
|
||||
@ -52,7 +51,23 @@ public class EntityRedactionService {
|
||||
searchableRow.addAll(textBlock.getSequences());
|
||||
}
|
||||
}
|
||||
searchableRows.add(searchableRow);
|
||||
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline());
|
||||
|
||||
Section analysedRowSection = droolsExecutionService.executeRules(Section.builder()
|
||||
.entities(rowEntities)
|
||||
.text(searchableRow.getAsStringWithLinebreaks())
|
||||
.searchText(searchableRow.toString())
|
||||
.headline(table.getHeadline())
|
||||
.build());
|
||||
|
||||
for (Entity entity : analysedRowSection.getEntities()) {
|
||||
if (dictionaryService.getCaseInsensitiveTypes().contains(entity.getType())) {
|
||||
entity.setPositionSequences(searchableRow.getSequences(entity.getWord(), true));
|
||||
} else {
|
||||
entity.setPositionSequences(searchableRow.getSequences(entity.getWord(), false));
|
||||
}
|
||||
}
|
||||
documentEntities.addAll(analysedRowSection.getEntities());
|
||||
}
|
||||
}
|
||||
|
||||
@ -73,26 +88,6 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
documentEntities.addAll(analysedSection.getEntities());
|
||||
|
||||
for (SearchableText searchableRow : searchableRows) {
|
||||
Set<Entity> rowEntities = findEntities(searchableRow, "//TODO TableHeader");
|
||||
|
||||
Section analysedRowSection = droolsExecutionService.executeRules(Section.builder()
|
||||
.entities(rowEntities)
|
||||
.text(searchableRow.getAsStringWithLinebreaks())
|
||||
.searchText(searchableRow.toString())
|
||||
.headline("//TODO TableHeader")
|
||||
.build());
|
||||
|
||||
for (Entity entity : analysedRowSection.getEntities()) {
|
||||
if (dictionaryService.getCaseInsensitiveTypes().contains(entity.getType())) {
|
||||
entity.setPositionSequences(searchableRow.getSequences(entity.getWord(), true));
|
||||
} else {
|
||||
entity.setPositionSequences(searchableRow.getSequences(entity.getWord(), false));
|
||||
}
|
||||
}
|
||||
documentEntities.addAll(analysedRowSection.getEntities());
|
||||
}
|
||||
}
|
||||
|
||||
documentEntities.forEach(entity -> {
|
||||
|
||||
@ -17,7 +17,6 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
@SuppressWarnings("all")
|
||||
public class SectionsBuilderService {
|
||||
|
||||
|
||||
public void buildSections(Document document) {
|
||||
|
||||
List<AbstractTextContainer> chunkWords = new ArrayList<>();
|
||||
@ -29,7 +28,8 @@ public class SectionsBuilderService {
|
||||
for (Page page : document.getPages()) {
|
||||
for (AbstractTextContainer current : page.getTextBlocks()) {
|
||||
|
||||
if (current.getClassification() == null || current.getClassification().equals("Header") || current.getClassification().equals("Footer")) {
|
||||
if (current.getClassification() == null || current.getClassification()
|
||||
.equals("Header") || current.getClassification().equals("Footer")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -37,7 +37,7 @@ public class SectionsBuilderService {
|
||||
|
||||
if (prev != null && current.getClassification().startsWith("H ") || !document.isHeadlines()) {
|
||||
|
||||
Paragraph chunkBlock = buildTextBlock(chunkWords);
|
||||
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline);
|
||||
chunkBlock.setHeadline(lastHeadline);
|
||||
lastHeadline = current.getText();
|
||||
chunkBlockList.add(chunkBlock);
|
||||
@ -51,7 +51,7 @@ public class SectionsBuilderService {
|
||||
}
|
||||
}
|
||||
|
||||
Paragraph chunkBlock = buildTextBlock(chunkWords);
|
||||
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline);
|
||||
if (chunkBlock != null) {
|
||||
chunkBlockList.add(chunkBlock);
|
||||
chunkBlock.setHeadline(lastHeadline);
|
||||
@ -61,7 +61,7 @@ public class SectionsBuilderService {
|
||||
}
|
||||
|
||||
|
||||
private Paragraph buildTextBlock(List<AbstractTextContainer> wordBlockList) {
|
||||
private Paragraph buildTextBlock(List<AbstractTextContainer> wordBlockList, String lastHeadline) {
|
||||
|
||||
Paragraph paragraph = new Paragraph();
|
||||
TextBlock textBlock = null;
|
||||
@ -70,17 +70,23 @@ public class SectionsBuilderService {
|
||||
boolean splitByTable = false;
|
||||
|
||||
Iterator<AbstractTextContainer> itty = wordBlockList.iterator();
|
||||
boolean alreadyAdded= false;
|
||||
boolean alreadyAdded = false;
|
||||
AbstractTextContainer previous = null;
|
||||
while (itty.hasNext()) {
|
||||
AbstractTextContainer container = itty.next();
|
||||
|
||||
if (container instanceof Table) {
|
||||
splitByTable = true;
|
||||
|
||||
if (previous != null && previous instanceof TextBlock && previous.getText().startsWith("Table ")) {
|
||||
((Table) container).setHeadline(previous.getText());
|
||||
} else {
|
||||
((Table) container).setHeadline("Table in: " + lastHeadline);
|
||||
}
|
||||
|
||||
if (textBlock != null && !alreadyAdded) {
|
||||
paragraph.getPageBlocks().add(textBlock);
|
||||
alreadyAdded =true;
|
||||
alreadyAdded = true;
|
||||
}
|
||||
paragraph.getPageBlocks().add(container);
|
||||
continue;
|
||||
@ -89,24 +95,28 @@ public class SectionsBuilderService {
|
||||
TextBlock wordBlock = (TextBlock) container;
|
||||
|
||||
if (textBlock == null) {
|
||||
textBlock = new TextBlock(wordBlock.getMinX(), wordBlock.getMaxX(), wordBlock.getMinY(), wordBlock.getMaxY(), wordBlock.getSequences(), wordBlock.getRotation());
|
||||
textBlock = new TextBlock(wordBlock.getMinX(), wordBlock.getMaxX(), wordBlock.getMinY(), wordBlock.getMaxY(), wordBlock
|
||||
.getSequences(), wordBlock.getRotation());
|
||||
textBlock.setPage(wordBlock.getPage());
|
||||
} else if (splitByTable) {
|
||||
textBlock = new TextBlock(wordBlock.getMinX(), wordBlock.getMaxX(), wordBlock.getMinY(), wordBlock.getMaxY(), wordBlock.getSequences(), wordBlock.getRotation());
|
||||
textBlock = new TextBlock(wordBlock.getMinX(), wordBlock.getMaxX(), wordBlock.getMinY(), wordBlock.getMaxY(), wordBlock
|
||||
.getSequences(), wordBlock.getRotation());
|
||||
textBlock.setPage(wordBlock.getPage());
|
||||
alreadyAdded = false;
|
||||
} else if (pageBefore != -1 && wordBlock.getPage() != pageBefore) {
|
||||
textBlock.setPage(pageBefore);
|
||||
paragraph.getPageBlocks().add(textBlock);
|
||||
textBlock = new TextBlock(wordBlock.getMinX(), wordBlock.getMaxX(), wordBlock.getMinY(), wordBlock.getMaxY(), wordBlock.getSequences(), wordBlock.getRotation());
|
||||
textBlock = new TextBlock(wordBlock.getMinX(), wordBlock.getMaxX(), wordBlock.getMinY(), wordBlock.getMaxY(), wordBlock
|
||||
.getSequences(), wordBlock.getRotation());
|
||||
textBlock.setPage(wordBlock.getPage());
|
||||
} else {
|
||||
TextBlock spatialEntity = textBlock.union(wordBlock);
|
||||
textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(),
|
||||
spatialEntity.getWidth(), spatialEntity.getHeight());
|
||||
textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(), spatialEntity.getWidth(), spatialEntity
|
||||
.getHeight());
|
||||
}
|
||||
pageBefore = wordBlock.getPage();
|
||||
splitByTable = false;
|
||||
previous = container;
|
||||
}
|
||||
|
||||
if (textBlock != null && !alreadyAdded) {
|
||||
@ -115,5 +125,4 @@ public class SectionsBuilderService {
|
||||
return paragraph;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -13,6 +13,7 @@ import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
@SuppressWarnings("all")
|
||||
public class Table extends AbstractTextContainer {
|
||||
@ -21,6 +22,10 @@ public class Table extends AbstractTextContainer {
|
||||
|
||||
private RectangleSpatialIndex<Cell> si = new RectangleSpatialIndex<>();
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
private String headline;
|
||||
|
||||
@Getter
|
||||
private int rowCount = 0;
|
||||
@Getter
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user