Normalize header information
This commit is contained in:
parent
a6415363cd
commit
c93ca745fc
@ -13,6 +13,7 @@ import java.util.stream.Collectors;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||
|
||||
import lombok.Getter;
|
||||
@ -105,7 +106,9 @@ public class Table extends AbstractTextContainer {
|
||||
verticalHeader = true;
|
||||
return firstColCells.stream().map(cell -> {
|
||||
if (CollectionUtils.isNotEmpty(cell.getTextBlocks())) {
|
||||
return cell.getTextBlocks().get(0).getText();
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(cell.getTextBlocks().get(0).getText())
|
||||
.replaceAll("\n", " ")
|
||||
.replaceAll(" ", " ");
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
@ -114,7 +117,9 @@ public class Table extends AbstractTextContainer {
|
||||
log.info("Headers are defaulted in first row.");
|
||||
return rowCells.stream().map(cell -> {
|
||||
if (cell != null && CollectionUtils.isNotEmpty(cell.getTextBlocks())) {
|
||||
return cell.getTextBlocks().get(0).getText();
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(cell.getTextBlocks().get(0).getText())
|
||||
.replaceAll("\n", " ")
|
||||
.replaceAll(" ", " ");
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
@ -124,7 +129,9 @@ public class Table extends AbstractTextContainer {
|
||||
log.info("Headers are in first row.");
|
||||
return rowCells.stream().map(cell -> {
|
||||
if (CollectionUtils.isNotEmpty(cell.getTextBlocks())) {
|
||||
return cell.getTextBlocks().get(0).getText();
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(cell.getTextBlocks().get(0).getText())
|
||||
.replaceAll("\n", " ")
|
||||
.replaceAll(" ", " ");
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
||||
@ -19,6 +19,7 @@ import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.Before;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.kie.api.KieServices;
|
||||
@ -47,6 +48,7 @@ import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
|
||||
@Ignore
|
||||
@RunWith(SpringRunner.class)
|
||||
@SpringBootTest(webEnvironment = DEFINED_PORT)
|
||||
public class RedactionIntegrationTest {
|
||||
|
||||
@ -120,13 +120,13 @@ public class EntityRedactionServiceTest {
|
||||
"rule \"9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study\"\n" +
|
||||
" when\n" +
|
||||
" Section(tabularData != null && tabularData.size() > 0\n" +
|
||||
" && tabularData.containsKey(\"Vertebrate\\nstudy Y/N\")\n" +
|
||||
" && tabularData.get(\"Vertebrate\\nstudy Y/N\").equals(\"Y\")\n" +
|
||||
" && tabularData.containsKey(\"Vertebrate study Y/N\")\n" +
|
||||
" && tabularData.get(\"Vertebrate study Y/N\").equals(\"Y\")\n" +
|
||||
" )\n" +
|
||||
" then\n" +
|
||||
" section.redact(\"name\", 9, \"Redacted because row is a vertebrate study\");\n" +
|
||||
" section.redact(\"address\", 9, \"Redacted because rows is a vertebrate study\");\n" +
|
||||
" section.highlightCell(\"Vertebrate\\nstudy Y/N\", 9);\n" +
|
||||
" section.highlightCell(\"Vertebrate study Y/N\", 9);\n" +
|
||||
" end";
|
||||
when(rulesClient.getVersion()).thenReturn(1L);
|
||||
when(rulesClient.getRules()).thenReturn(new RulesResponse(tableRules));
|
||||
|
||||
@ -102,11 +102,11 @@ rule "8: Redact contact information, if Producer is found"
|
||||
rule "9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study"
|
||||
when
|
||||
Section(tabularData != null && tabularData.size() > 0
|
||||
&& tabularData.containsKey("Vertebrate\nstudy Y/N")
|
||||
&& tabularData.get("Vertebrate\nstudy Y/N").equals("Y")
|
||||
&& tabularData.containsKey("Vertebrate study Y/N")
|
||||
&& tabularData.get("Vertebrate study Y/N").equals("Y")
|
||||
)
|
||||
then
|
||||
section.redact("name", 9, "Redacted because row is a vertebrate study");
|
||||
section.redact("address", 9, "Redacted because rows is a vertebrate study");
|
||||
section.highlightCell("Vertebrate\nstudy Y/N", 9);
|
||||
section.highlightCell("Vertebrate study Y/N", 9);
|
||||
end
|
||||
Loading…
x
Reference in New Issue
Block a user