diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java
index 73c15013..ee1e5771 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java
@@ -40,7 +40,6 @@ public class EntityRedactionService {
List
tables = paragraph.getTables();
- List searchableRows = new ArrayList<>();
for (Table table : tables) {
for (List row : table.getRows()) {
SearchableText searchableRow = new SearchableText();
@@ -52,7 +51,23 @@ public class EntityRedactionService {
searchableRow.addAll(textBlock.getSequences());
}
}
- searchableRows.add(searchableRow);
+ Set rowEntities = findEntities(searchableRow, table.getHeadline());
+
+ Section analysedRowSection = droolsExecutionService.executeRules(Section.builder()
+ .entities(rowEntities)
+ .text(searchableRow.getAsStringWithLinebreaks())
+ .searchText(searchableRow.toString())
+ .headline(table.getHeadline())
+ .build());
+
+ for (Entity entity : analysedRowSection.getEntities()) {
+ if (dictionaryService.getCaseInsensitiveTypes().contains(entity.getType())) {
+ entity.setPositionSequences(searchableRow.getSequences(entity.getWord(), true));
+ } else {
+ entity.setPositionSequences(searchableRow.getSequences(entity.getWord(), false));
+ }
+ }
+ documentEntities.addAll(analysedRowSection.getEntities());
}
}
@@ -73,26 +88,6 @@ public class EntityRedactionService {
}
documentEntities.addAll(analysedSection.getEntities());
-
- for (SearchableText searchableRow : searchableRows) {
- Set rowEntities = findEntities(searchableRow, "//TODO TableHeader");
-
- Section analysedRowSection = droolsExecutionService.executeRules(Section.builder()
- .entities(rowEntities)
- .text(searchableRow.getAsStringWithLinebreaks())
- .searchText(searchableRow.toString())
- .headline("//TODO TableHeader")
- .build());
-
- for (Entity entity : analysedRowSection.getEntities()) {
- if (dictionaryService.getCaseInsensitiveTypes().contains(entity.getType())) {
- entity.setPositionSequences(searchableRow.getSequences(entity.getWord(), true));
- } else {
- entity.setPositionSequences(searchableRow.getSequences(entity.getWord(), false));
- }
- }
- documentEntities.addAll(analysedRowSection.getEntities());
- }
}
documentEntities.forEach(entity -> {
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/SectionsBuilderService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/SectionsBuilderService.java
index 8c64ce56..04a45e91 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/SectionsBuilderService.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/SectionsBuilderService.java
@@ -17,7 +17,6 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
@SuppressWarnings("all")
public class SectionsBuilderService {
-
public void buildSections(Document document) {
List chunkWords = new ArrayList<>();
@@ -29,7 +28,8 @@ public class SectionsBuilderService {
for (Page page : document.getPages()) {
for (AbstractTextContainer current : page.getTextBlocks()) {
- if (current.getClassification() == null || current.getClassification().equals("Header") || current.getClassification().equals("Footer")) {
+ if (current.getClassification() == null || current.getClassification()
+ .equals("Header") || current.getClassification().equals("Footer")) {
continue;
}
@@ -37,7 +37,7 @@ public class SectionsBuilderService {
if (prev != null && current.getClassification().startsWith("H ") || !document.isHeadlines()) {
- Paragraph chunkBlock = buildTextBlock(chunkWords);
+ Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline);
chunkBlock.setHeadline(lastHeadline);
lastHeadline = current.getText();
chunkBlockList.add(chunkBlock);
@@ -51,7 +51,7 @@ public class SectionsBuilderService {
}
}
- Paragraph chunkBlock = buildTextBlock(chunkWords);
+ Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline);
if (chunkBlock != null) {
chunkBlockList.add(chunkBlock);
chunkBlock.setHeadline(lastHeadline);
@@ -61,7 +61,7 @@ public class SectionsBuilderService {
}
- private Paragraph buildTextBlock(List wordBlockList) {
+ private Paragraph buildTextBlock(List wordBlockList, String lastHeadline) {
Paragraph paragraph = new Paragraph();
TextBlock textBlock = null;
@@ -70,17 +70,23 @@ public class SectionsBuilderService {
boolean splitByTable = false;
Iterator itty = wordBlockList.iterator();
- boolean alreadyAdded= false;
+ boolean alreadyAdded = false;
+ AbstractTextContainer previous = null;
while (itty.hasNext()) {
AbstractTextContainer container = itty.next();
if (container instanceof Table) {
splitByTable = true;
+ if (previous != null && previous instanceof TextBlock && previous.getText().startsWith("Table ")) {
+ ((Table) container).setHeadline(previous.getText());
+ } else {
+ ((Table) container).setHeadline("Table in: " + lastHeadline);
+ }
if (textBlock != null && !alreadyAdded) {
paragraph.getPageBlocks().add(textBlock);
- alreadyAdded =true;
+ alreadyAdded = true;
}
paragraph.getPageBlocks().add(container);
continue;
@@ -89,24 +95,28 @@ public class SectionsBuilderService {
TextBlock wordBlock = (TextBlock) container;
if (textBlock == null) {
- textBlock = new TextBlock(wordBlock.getMinX(), wordBlock.getMaxX(), wordBlock.getMinY(), wordBlock.getMaxY(), wordBlock.getSequences(), wordBlock.getRotation());
+ textBlock = new TextBlock(wordBlock.getMinX(), wordBlock.getMaxX(), wordBlock.getMinY(), wordBlock.getMaxY(), wordBlock
+ .getSequences(), wordBlock.getRotation());
textBlock.setPage(wordBlock.getPage());
} else if (splitByTable) {
- textBlock = new TextBlock(wordBlock.getMinX(), wordBlock.getMaxX(), wordBlock.getMinY(), wordBlock.getMaxY(), wordBlock.getSequences(), wordBlock.getRotation());
+ textBlock = new TextBlock(wordBlock.getMinX(), wordBlock.getMaxX(), wordBlock.getMinY(), wordBlock.getMaxY(), wordBlock
+ .getSequences(), wordBlock.getRotation());
textBlock.setPage(wordBlock.getPage());
alreadyAdded = false;
} else if (pageBefore != -1 && wordBlock.getPage() != pageBefore) {
textBlock.setPage(pageBefore);
paragraph.getPageBlocks().add(textBlock);
- textBlock = new TextBlock(wordBlock.getMinX(), wordBlock.getMaxX(), wordBlock.getMinY(), wordBlock.getMaxY(), wordBlock.getSequences(), wordBlock.getRotation());
+ textBlock = new TextBlock(wordBlock.getMinX(), wordBlock.getMaxX(), wordBlock.getMinY(), wordBlock.getMaxY(), wordBlock
+ .getSequences(), wordBlock.getRotation());
textBlock.setPage(wordBlock.getPage());
} else {
TextBlock spatialEntity = textBlock.union(wordBlock);
- textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(),
- spatialEntity.getWidth(), spatialEntity.getHeight());
+ textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(), spatialEntity.getWidth(), spatialEntity
+ .getHeight());
}
pageBefore = wordBlock.getPage();
splitByTable = false;
+ previous = container;
}
if (textBlock != null && !alreadyAdded) {
@@ -115,5 +125,4 @@ public class SectionsBuilderService {
return paragraph;
}
-
}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java
index f45c2d7c..20f1fbd8 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java
@@ -13,6 +13,7 @@ import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
import lombok.Getter;
+import lombok.Setter;
@SuppressWarnings("all")
public class Table extends AbstractTextContainer {
@@ -21,6 +22,10 @@ public class Table extends AbstractTextContainer {
private RectangleSpatialIndex| si = new RectangleSpatialIndex<>();
+ @Getter
+ @Setter
+ private String headline;
+
@Getter
private int rowCount = 0;
@Getter
| |