diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ReanalyzeResult.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ReanalyzeResult.java
new file mode 100644
index 00000000..963f3d70
--- /dev/null
+++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ReanalyzeResult.java
@@ -0,0 +1,15 @@
+package com.iqser.red.service.redaction.v1.model;
+
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+@Data
+@Builder
+@NoArgsConstructor
+@AllArgsConstructor
+public class ReanalyzeResult {
+
+ private RedactionLog redactionLog;
+}
diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RenalyzeRequest.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RenalyzeRequest.java
new file mode 100644
index 00000000..e11fee5d
--- /dev/null
+++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RenalyzeRequest.java
@@ -0,0 +1,22 @@
+package com.iqser.red.service.redaction.v1.model;
+
+import java.time.OffsetDateTime;
+
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+@Data
+@Builder
+@NoArgsConstructor
+@AllArgsConstructor
+public class RenalyzeRequest {
+
+ private byte[] document;
+ private String ruleSetId;
+ private ManualRedactions manualRedactions;
+ private Text text;
+ private RedactionLog redactionLog;
+ private OffsetDateTime lastProcessed;
+}
diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionArea.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionArea.java
index 0944bcd0..3e02dce8 100644
--- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionArea.java
+++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionArea.java
@@ -26,4 +26,8 @@ public class SectionArea {
private String header;
+ public boolean contains(Rectangle other) {
+ return page == other.getPage() && this.topLeft.getX() <= other.getTopLeft().getX() && this.topLeft.getX() + this.getWidth() >= other.getTopLeft().getX() + other.getWidth() && this.getTopLeft().getY() <= other.getTopLeft().getY() && this.getTopLeft().getY() + this.getHeight() >= other.getTopLeft().getY() + other.getHeight();
+ }
+
}
diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/resources/RedactionResource.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/resources/RedactionResource.java
index 8dfc4ebb..b58dcd9c 100644
--- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/resources/RedactionResource.java
+++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/resources/RedactionResource.java
@@ -4,8 +4,11 @@ import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
import com.iqser.red.service.redaction.v1.model.AnnotateRequest;
import com.iqser.red.service.redaction.v1.model.AnnotateResponse;
+import com.iqser.red.service.redaction.v1.model.ReanalyzeResult;
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
import com.iqser.red.service.redaction.v1.model.RedactionResult;
+import com.iqser.red.service.redaction.v1.model.RenalyzeRequest;
+
import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PostMapping;
@@ -21,6 +24,9 @@ public interface RedactionResource {
@PostMapping(value = "/analyze", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
AnalyzeResult analyze(@RequestBody AnalyzeRequest analyzeRequest);
+ @PostMapping(value = "/reanalyze", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
+ ReanalyzeResult reanalyze(@RequestBody RenalyzeRequest renalyzeRequest);
+
@PostMapping(value = "/annotate", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest);
diff --git a/redaction-service-v1/redaction-service-server-v1/pom.xml b/redaction-service-v1/redaction-service-server-v1/pom.xml
index 6e8610fa..fba190d6 100644
--- a/redaction-service-v1/redaction-service-server-v1/pom.xml
+++ b/redaction-service-v1/redaction-service-server-v1/pom.xml
@@ -20,7 +20,7 @@
com.iqser.red.service
configuration-service-api-v1
- 2.0.0
+ 2.2.9
org.drools
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java
index f3d41ab2..a9110337 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java
@@ -4,10 +4,12 @@ import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
import com.iqser.red.service.redaction.v1.model.AnnotateRequest;
import com.iqser.red.service.redaction.v1.model.AnnotateResponse;
+import com.iqser.red.service.redaction.v1.model.ReanalyzeResult;
import com.iqser.red.service.redaction.v1.model.RedactionLog;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
import com.iqser.red.service.redaction.v1.model.RedactionResult;
+import com.iqser.red.service.redaction.v1.model.RenalyzeRequest;
import com.iqser.red.service.redaction.v1.model.SectionGrid;
import com.iqser.red.service.redaction.v1.model.Text;
import com.iqser.red.service.redaction.v1.resources.RedactionResource;
@@ -18,13 +20,16 @@ import com.iqser.red.service.redaction.v1.server.redaction.service.AnnotationSer
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
import com.iqser.red.service.redaction.v1.server.redaction.service.EntityRedactionService;
+import com.iqser.red.service.redaction.v1.server.redaction.service.ReanalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogCreatorService;
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService;
+
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
+
import org.apache.pdfbox.pdmodel.PDDocument;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestBody;
@@ -47,6 +52,7 @@ public class RedactionController implements RedactionResource {
private final DroolsExecutionService droolsExecutionService;
private final DictionaryService dictionaryService;
private final AnnotationService annotationService;
+ private final ReanalyzeService reanalyzeService;
@Override
@@ -68,7 +74,7 @@ public class RedactionController implements RedactionResource {
return AnalyzeResult.builder()
.sectionGrid(classifiedDoc.getSectionGrid())
.redactionLog(new RedactionLog(classifiedDoc.getRedactionLogEntities(), classifiedDoc.getDictionaryVersion(), classifiedDoc
- .getRulesVersion(), analyzeRequest.getRuleSetId()))
+ .getRulesVersion(), analyzeRequest.getRuleSetId()))
.numberOfPages(classifiedDoc.getPages().size())
.text(new Text(classifiedDoc.getSectionText()))
.build();
@@ -80,6 +86,12 @@ public class RedactionController implements RedactionResource {
}
+ public ReanalyzeResult reanalyze(@RequestBody RenalyzeRequest renalyzeRequest) {
+
+ return reanalyzeService.reanalyze(renalyzeRequest);
+ }
+
+
public AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest) {
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(annotateRequest.getDocument()))) {
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFAreaTextStripper.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFAreaTextStripper.java
new file mode 100644
index 00000000..7e2e56c8
--- /dev/null
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFAreaTextStripper.java
@@ -0,0 +1,83 @@
+package com.iqser.red.service.redaction.v1.server.parsing;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.pdfbox.text.PDFTextStripperByArea;
+import org.apache.pdfbox.text.TextPosition;
+
+import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
+
+import lombok.Getter;
+import lombok.Setter;
+
+public class PDFAreaTextStripper extends PDFTextStripperByArea {
+
+ @Getter
+ private List textPositionSequences = new ArrayList<>();
+
+ @Setter
+ private int pageNumber;
+
+ public PDFAreaTextStripper() throws IOException {
+
+ }
+
+ @Override
+ public void writeString(String text, List textPositions) throws IOException {
+
+ int startIndex = 0;
+ for (int i = 0; i <= textPositions.size() - 1; i++) {
+
+ if (i == 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i)
+ .getUnicode()
+ .equals("\u00A0"))) {
+ startIndex++;
+ continue;
+ }
+
+ // Strange but sometimes this is happening, for example: Metolachlor2.pdf
+ if (i > 0 && textPositions.get(i).getX() < textPositions.get(i - 1).getX()) {
+ List sublist = textPositions.subList(startIndex, i);
+ if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
+ .getUnicode()
+ .equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
+ textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
+ }
+ startIndex = i;
+ }
+
+ if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i)
+ .getUnicode()
+ .equals("\u00A0")) && i <= textPositions.size() - 2) {
+ List sublist = textPositions.subList(startIndex, i);
+ if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
+ .getUnicode()
+ .equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
+ textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
+ }
+ startIndex = i + 1;
+ }
+ }
+
+ List sublist = textPositions.subList(startIndex, textPositions.size());
+ if (!sublist.isEmpty() && (sublist.get(sublist.size() - 1)
+ .getUnicode()
+ .equals(" ") || sublist.get(sublist.size() - 1).getUnicode().equals("\u00A0"))) {
+ sublist = sublist.subList(0, sublist.size() - 1);
+ }
+ if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0)
+ .getUnicode()
+ .equals("\u00A0")))) {
+ textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
+ }
+ super.writeString(text);
+ }
+
+
+ public void clearPositions(){
+ textPositionSequences = new ArrayList<>();
+ }
+
+}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryIncrement.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryIncrement.java
new file mode 100644
index 00000000..86362741
--- /dev/null
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryIncrement.java
@@ -0,0 +1,15 @@
+package com.iqser.red.service.redaction.v1.server.redaction.model;
+
+import java.util.Set;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+
+@Data
+@AllArgsConstructor
+public class DictionaryIncrement {
+
+ private Set values;
+ private long dictionaryVersion;
+
+}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java
index dd8aecc1..c4b0ce7c 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java
@@ -3,6 +3,9 @@ package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.io.Serializable;
import java.util.Set;
+import java.util.stream.Collectors;
+
+import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
import lombok.AllArgsConstructor;
import lombok.Data;
@@ -17,11 +20,12 @@ public class DictionaryModel implements Serializable {
private boolean caseInsensitive;
private boolean hint;
private boolean recommendation;
- private Set entries;
+ private Set entries;
private Set localEntries;
public Set getValues(boolean local){
- return local ? localEntries : entries;
+ return local ? localEntries : entries.stream().filter(e -> !e.isDeleted()).map(e-> e.getValue()).collect(Collectors
+ .toSet());
}
}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/ReanalysisSection.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/ReanalysisSection.java
new file mode 100644
index 00000000..df867485
--- /dev/null
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/ReanalysisSection.java
@@ -0,0 +1,34 @@
+package com.iqser.red.service.redaction.v1.server.redaction.model;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
+
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+@Data
+@NoArgsConstructor
+public class ReanalysisSection {
+
+ private int sectionNumber;
+ private String headline;
+ private List textBlocks;
+ private Map tabularData = new HashMap<>();
+ private List cellStarts;
+
+
+ public SearchableText getSearchableText() {
+
+ SearchableText searchableText = new SearchableText();
+ textBlocks.forEach(block -> {
+ if (block instanceof TextBlock) {
+ searchableText.addAll(block.getSequences());
+ }
+ });
+ return searchableText;
+ }
+
+}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java
index 0df2ca2d..e622a013 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java
@@ -1,42 +1,45 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
-import com.iqser.red.service.configuration.v1.api.model.Colors;
-import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
-import com.iqser.red.service.configuration.v1.api.model.TypeResult;
-import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
-import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
-import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
-import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryRepresentation;
-import feign.FeignException;
-import lombok.RequiredArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-import org.apache.commons.collections4.CollectionUtils;
-import org.apache.commons.lang3.SerializationUtils;
-import org.springframework.stereotype.Service;
-
import java.awt.Color;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
+import org.apache.commons.collections4.CollectionUtils;
+import org.apache.commons.lang3.SerializationUtils;
+import org.springframework.stereotype.Service;
+
+import com.iqser.red.service.configuration.v1.api.model.Colors;
+import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
+import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
+import com.iqser.red.service.configuration.v1.api.model.TypeResult;
+import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
+import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
+import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement;
+import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
+import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryRepresentation;
+
+import feign.FeignException;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+
@Slf4j
@Service
@RequiredArgsConstructor
public class DictionaryService {
-
private final DictionaryClient dictionaryClient;
-
private Map dictionariesByRuleSets = new HashMap<>();
- public void updateDictionary(String ruleSetId) {
+ public long updateDictionary(String ruleSetId) {
long version = dictionaryClient.getVersion(ruleSetId);
@@ -45,6 +48,26 @@ public class DictionaryService {
if (foundDictionary == null || version > foundDictionary.getDictionaryVersion()) {
updateDictionaryEntry(ruleSetId, version);
}
+
+ return version;
+ }
+
+
+ public DictionaryIncrement getDictionaryIncrements(String ruleSetId, long fromVersion) {
+
+ long version = updateDictionary(ruleSetId);
+
+ Set newValues = new HashSet<>();
+ List dictionaryModels = dictionariesByRuleSets.get(ruleSetId).getDictionary();
+ dictionaryModels.forEach(dictionaryModel -> {
+ dictionaryModel.getEntries().forEach(dictionaryEntry -> {
+ if (dictionaryEntry.getVersion() > fromVersion) {
+ newValues.add(dictionaryEntry.getValue());
+ }
+ });
+ });
+
+ return new DictionaryIncrement(newValues, version);
}
@@ -63,7 +86,6 @@ public class DictionaryService {
.sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed())
.collect(Collectors.toList());
-
dictionary.forEach(dm -> dictionaryRepresentation.getLocalAccessMap().put(dm.getType(), dm));
Colors colors = dictionaryClient.getColors(ruleSetId);
@@ -86,6 +108,7 @@ public class DictionaryService {
public void updateExternalDictionary(Dictionary dictionary, String ruleSetId) {
+
dictionary.getDictionaryModels().forEach(dm -> {
if (dm.isRecommendation() && !dm.getLocalEntries().isEmpty()) {
dictionaryClient.addEntries(dm.getType(), ruleSetId, new ArrayList<>(dm.getLocalEntries()), false);
@@ -98,17 +121,15 @@ public class DictionaryService {
}
- private Set convertEntries(TypeResult t) {
+ private Set convertEntries(TypeResult t) {
+
+ Set entries = new HashSet<>(dictionaryClient.getDictionaryForType(t.getType(), t.getRuleSetId())
+ .getEntries());
if (t.isCaseInsensitive()) {
- return dictionaryClient.getDictionaryForType(t.getType(), t.getRuleSetId())
- .getEntries()
- .stream()
- .map(String::toLowerCase)
- .collect(Collectors.toSet());
- } else {
- return new HashSet<>(dictionaryClient.getDictionaryForType(t.getType(), t.getRuleSetId()).getEntries());
+ entries.forEach(entry -> entry.getValue().toLowerCase(Locale.ROOT));
}
+ return entries;
}
@@ -148,6 +169,7 @@ public class DictionaryService {
return false;
}
+
public boolean isRecommendation(String type, String ruleSetId) {
DictionaryModel model = dictionariesByRuleSets.get(ruleSetId).getLocalAccessMap().get(type);
@@ -159,6 +181,7 @@ public class DictionaryService {
public Dictionary getDeepCopyDictionary(String ruleSetId) {
+
List copy = new ArrayList<>();
var representation = dictionariesByRuleSets.get(ruleSetId);
@@ -170,15 +193,22 @@ public class DictionaryService {
return new Dictionary(copy, representation.getDictionaryVersion());
}
+
public float[] getRequestRemoveColor(String ruleSetId) {
+
return dictionariesByRuleSets.get(ruleSetId).getRequestAddColor();
}
+
public float[] getNotRedactedColor(String ruleSetId) {
+
return dictionariesByRuleSets.get(ruleSetId).getNotRedactedColor();
}
+
public float[] getRequestAddColor(String ruleSetId) {
+
return dictionariesByRuleSets.get(ruleSetId).getRequestAddColor();
}
+
}
\ No newline at end of file
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java
index c8d235c8..07a0a37a 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java
@@ -349,7 +349,7 @@ public class EntityRedactionService {
}
- private Set findEntities(SearchableText searchableText, String headline, int sectionNumber,
+ public Set findEntities(SearchableText searchableText, String headline, int sectionNumber,
Dictionary dictionary, boolean local) {
Set found = new HashSet<>();
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java
new file mode 100644
index 00000000..f868490a
--- /dev/null
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java
@@ -0,0 +1,281 @@
+package com.iqser.red.service.redaction.v1.server.redaction.service;
+
+import java.awt.geom.Rectangle2D;
+import java.io.ByteArrayInputStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.common.PDRectangle;
+import org.kie.api.runtime.KieContainer;
+import org.springframework.stereotype.Service;
+import org.springframework.web.bind.annotation.RequestBody;
+
+import com.iqser.red.service.redaction.v1.model.Comment;
+import com.iqser.red.service.redaction.v1.model.IdRemoval;
+import com.iqser.red.service.redaction.v1.model.ManualForceRedact;
+import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
+import com.iqser.red.service.redaction.v1.model.ManualRedactions;
+import com.iqser.red.service.redaction.v1.model.ReanalyzeResult;
+import com.iqser.red.service.redaction.v1.model.Rectangle;
+import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
+import com.iqser.red.service.redaction.v1.model.RenalyzeRequest;
+import com.iqser.red.service.redaction.v1.model.SectionArea;
+import com.iqser.red.service.redaction.v1.model.SectionText;
+import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
+import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
+import com.iqser.red.service.redaction.v1.server.parsing.PDFAreaTextStripper;
+import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
+import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
+import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
+import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement;
+import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
+import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
+import com.iqser.red.service.redaction.v1.server.redaction.model.ReanalysisSection;
+import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
+import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair;
+import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
+import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
+
+import lombok.RequiredArgsConstructor;
+
+@Service
+@RequiredArgsConstructor
+public class ReanalyzeService {
+
+ private final DictionaryService dictionaryService;
+ private final DroolsExecutionService droolsExecutionService;
+ private final SurroundingWordsService surroundingWordsService;
+ private final EntityRedactionService entityRedactionService;
+ private final RedactionLogCreatorService redactionLogCreatorService;
+
+
+ public ReanalyzeResult reanalyze(@RequestBody RenalyzeRequest renalyzeRequest) {
+
+ DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(renalyzeRequest.getRuleSetId(), renalyzeRequest
+ .getRedactionLog()
+ .getDictionaryVersion());
+
+ Set manualForceAndRemoveIds = getForceAndRemoveIds(renalyzeRequest.getManualRedactions());
+ Map> comments = null;
+ Set manualAdds = null;
+
+ if (renalyzeRequest.getManualRedactions() != null) {
+ // TODO comments will be removed from redactionLog, so we ignore this first.
+ comments = renalyzeRequest.getManualRedactions().getComments();
+ manualAdds = renalyzeRequest.getManualRedactions().getEntriesToAdd();
+ }
+
+ Set sectionsToReanaylse = new HashSet<>();
+ for (RedactionLogEntry entry : renalyzeRequest.getRedactionLog().getRedactionLogEntry()) {
+ if (entry.isManual() || manualForceAndRemoveIds.contains(entry.getId())) {
+ sectionsToReanaylse.add(entry.getSectionNumber());
+ }
+ }
+
+ for (SectionText sectionText : renalyzeRequest.getText().getSectionTexts()) {
+ Set entities = EntitySearchUtils.find(sectionText.getText(), dictionaryIncrement.getValues(), "find", sectionText
+ .getHeadline(), sectionText.getSectionNumber(), false);
+ if (!entities.isEmpty()) {
+ sectionsToReanaylse.add(sectionText.getSectionNumber());
+ }
+
+ if (manualAdds != null) {
+ for (SectionArea sectionArea : sectionText.getSectionAreas()) {
+ for (ManualRedactionEntry manualAdd : manualAdds) {
+ for (Rectangle manualPosition : manualAdd.getPositions()) {
+ if (sectionArea.contains(manualPosition)) {
+ manualAdd.setSection(sectionText.getHeadline());
+ manualAdd.setSectionNumber(sectionText.getSectionNumber());
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (sectionsToReanaylse.isEmpty() && (manualAdds == null || manualAdds.isEmpty())) {
+ renalyzeRequest.getRedactionLog().setDictionaryVersion(dictionaryIncrement.getDictionaryVersion());
+ return ReanalyzeResult.builder().redactionLog(renalyzeRequest.getRedactionLog()).build();
+ }
+
+ try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(renalyzeRequest.getDocument()))) {
+
+ List reanalysisSections = new ArrayList<>();
+ for (SectionText sectionText : renalyzeRequest.getText().getSectionTexts()) {
+
+ if (!sectionsToReanaylse.contains(sectionText.getSectionNumber())) {
+ continue;
+ }
+
+ ReanalysisSection reanalysisSection = new ReanalysisSection();
+ reanalysisSection.setHeadline(sectionText.getHeadline());
+ reanalysisSection.setSectionNumber(sectionText.getSectionNumber());
+ List textBlocks = new ArrayList<>();
+
+ Map> sectionAreasPerPage = new HashMap<>();
+ for (SectionArea sectionArea : sectionText.getSectionAreas()) {
+ sectionAreasPerPage.computeIfAbsent(sectionArea.getPage(), (x) -> new ArrayList<>())
+ .add(sectionArea);
+ }
+
+ Map tabularData = new HashMap<>();
+ List cellStarts = new ArrayList<>();
+ for (Integer page : sectionAreasPerPage.keySet()) {
+ List areasOnPage = sectionAreasPerPage.get(page);
+
+ PDPage pdPage = pdDocument.getPage(page - 1);
+ PDRectangle cropBox = pdPage.getCropBox();
+ PDFAreaTextStripper textStripper = new PDFAreaTextStripper();
+ textStripper.setPageNumber(page);
+
+ int cellStart = 0;
+ for (SectionArea sectionArea : areasOnPage) {
+
+ Rectangle2D rect = null;
+ if (pdPage.getRotation() == 90) {
+ rect = new Rectangle2D.Float(sectionArea.getTopLeft().getY(), sectionArea.getTopLeft()
+ .getX(), sectionArea.getHeight(), sectionArea.getWidth() + 0.001f);
+ } else {
+ rect = new Rectangle2D.Float(sectionArea.getTopLeft().getX(), -sectionArea.getTopLeft()
+ .getY() + cropBox.getUpperRightY() - sectionArea.getHeight(), sectionArea.getWidth(), sectionArea
+ .getHeight() + 0.001f);
+ }
+
+ textStripper.addRegion(String.valueOf(1), rect);
+ textStripper.extractRegions(pdPage);
+ textStripper.getTextForRegion(String.valueOf(1));
+ List positions = textStripper.getTextPositionSequences();
+
+ TextBlock textBlock = new TextBlock(sectionArea.getTopLeft().getX(), sectionArea.getTopLeft()
+ .getX() + sectionArea.getWidth(), sectionArea.getTopLeft()
+ .getY(), sectionArea.getTopLeft().getY() + sectionArea.getHeight(), positions, 0);
+
+ if (sectionText.isTable()) {
+ Cell cell = new Cell();
+ cell.addTextBlock(textBlock);
+ tabularData.put(sectionArea.getHeader(), new CellValue(cell.getTextBlocks(), cellStart));
+ cellStarts.add(cellStart);
+ cellStart = cellStart + cell.toString().trim().length() + 1;
+ }
+
+ textBlocks.add(textBlock);
+ textStripper.clearPositions();
+ }
+
+ }
+ reanalysisSection.setTextBlocks(textBlocks);
+ reanalysisSection.setTabularData(tabularData);
+ reanalysisSections.add(reanalysisSection);
+ if (sectionText.isTable()) {
+ reanalysisSection.setCellStarts(cellStarts);
+ }
+ }
+
+ //--
+
+ KieContainer kieContainer = droolsExecutionService.updateRules(renalyzeRequest.getRuleSetId());
+
+ Dictionary dictionary = dictionaryService.getDeepCopyDictionary(renalyzeRequest.getRuleSetId());
+
+ List sectionSearchableTextPairs = new ArrayList<>();
+ for (ReanalysisSection reanalysisSection : reanalysisSections) {
+
+ Set entities = entityRedactionService.findEntities(reanalysisSection.getSearchableText(), reanalysisSection
+ .getHeadline(), reanalysisSection.getSectionNumber(), dictionary, false);
+ if (reanalysisSection.getCellStarts() != null) {
+ surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary, reanalysisSection
+ .getCellStarts());
+ } else {
+ surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary);
+ }
+
+ sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
+ .isLocal(false)
+ .dictionaryTypes(dictionary.getTypes())
+ .entities(entities)
+ .text(reanalysisSection.getSearchableText().getAsStringWithLinebreaks())
+ .searchText(reanalysisSection.getSearchableText().toString())
+ .headline(reanalysisSection.getHeadline())
+ .sectionNumber(reanalysisSection.getSectionNumber())
+ .tabularData(reanalysisSection.getTabularData())
+ .searchableText(reanalysisSection.getSearchableText())
+ .dictionary(dictionary)
+ .build(), reanalysisSection.getSearchableText()));
+ }
+
+ Set entities = new HashSet<>();
+ sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
+ Section analysedRowSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair
+ .getSection());
+ entities.addAll(analysedRowSection.getEntities());
+ });
+
+ Map> entitiesPerPage = new HashMap<>();
+ for (Entity entity : entities) {
+ Map> sequenceOnPage = new HashMap<>();
+ for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
+ sequenceOnPage.computeIfAbsent(entityPositionSequence.getPageNumber(), (x) -> new ArrayList<>())
+ .add(entityPositionSequence);
+ }
+
+ for (Map.Entry> entry : sequenceOnPage.entrySet()) {
+ entitiesPerPage.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
+ .add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
+ .getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity
+ .getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity
+ .getStart(), entity.getEnd()));
+ }
+ }
+
+ List newRedactionLogEntries = new ArrayList<>();
+ for (int page = 1; page <= pdDocument.getNumberOfPages(); page++) {
+ if (entitiesPerPage.get(page) != null) {
+ newRedactionLogEntries.addAll(redactionLogCreatorService.addEntries(entitiesPerPage, renalyzeRequest
+ .getManualRedactions(), page, renalyzeRequest.getRuleSetId()));
+ }
+ newRedactionLogEntries.addAll(redactionLogCreatorService.addManualAddEntries(manualAdds, comments, page, renalyzeRequest
+ .getRuleSetId()));
+ }
+
+ Iterator itty = renalyzeRequest.getRedactionLog().getRedactionLogEntry().iterator();
+ while (itty.hasNext()) {
+ RedactionLogEntry entry = itty.next();
+ if (sectionsToReanaylse.contains(entry.getSectionNumber()) || entry.getSectionNumber() == 0) {
+ itty.remove();
+ }
+ }
+
+ renalyzeRequest.getRedactionLog().getRedactionLogEntry().addAll(newRedactionLogEntries);
+ renalyzeRequest.getRedactionLog().setDictionaryVersion(dictionaryIncrement.getDictionaryVersion());
+
+ return ReanalyzeResult.builder().redactionLog(renalyzeRequest.getRedactionLog()).build();
+
+ } catch (Exception e) {
+ throw new RedactionException(e);
+ }
+
+ }
+
+
+ private Set getForceAndRemoveIds(ManualRedactions manualRedactions) {
+
+ if (manualRedactions == null) {
+ return new HashSet<>();
+ }
+
+ return Stream.concat(manualRedactions.getIdsToRemove()
+ .stream()
+ .map(IdRemoval::getId), manualRedactions.getForceRedacts().stream().map(ManualForceRedact::getId))
+ .collect(Collectors.toSet());
+ }
+
+}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java
index afea9472..6b5278ae 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java
@@ -4,6 +4,7 @@ import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
@@ -55,11 +56,11 @@ public class RedactionLogCreatorService {
addSectionGrid(classifiedDoc, page);
if (classifiedDoc.getEntities().get(page) != null) {
- addEntries(classifiedDoc, manualRedactions, page, ruleSetId);
+ classifiedDoc.getRedactionLogEntities().addAll(addEntries(classifiedDoc.getEntities(), manualRedactions, page, ruleSetId));
}
if (manualRedactionPages.contains(page)) {
- addManualEntries(classifiedDoc, manualRedactions, page, ruleSetId);
+ classifiedDoc.getRedactionLogEntities().addAll(addManualAddEntries(manualRedactions.getEntriesToAdd(), manualRedactions.getComments(), page, ruleSetId));
}
if (!classifiedDoc.getPages().get(page - 1).getImageBounds().isEmpty()) {
@@ -106,13 +107,15 @@ public class RedactionLogCreatorService {
}
- private void addEntries(Document classifiedDoc, ManualRedactions manualRedactions, int page, String ruleSetId) {
+ public List addEntries(Map> entities, ManualRedactions manualRedactions, int page, String ruleSetId) {
+
+ List redactionLogEntities = new ArrayList<>();
// Duplicates can exist due table extraction colums over multiple rows.
Set processedIds = new HashSet<>();
entityLoop:
- for (Entity entity : classifiedDoc.getEntities().get(page)) {
+ for (Entity entity : entities.get(page)) {
List comments = null;
@@ -201,10 +204,12 @@ public class RedactionLogCreatorService {
// FIXME ids should never be null. Figure out why this happens.
if (redactionLogEntry.getId() != null) {
- classifiedDoc.getRedactionLogEntities().add(redactionLogEntry);
+ redactionLogEntities.add(redactionLogEntry);
}
}
}
+
+ return redactionLogEntities;
}
@@ -233,14 +238,16 @@ public class RedactionLogCreatorService {
}
- private void addManualEntries(Document classifiedDoc, ManualRedactions manualRedactions, int page,
+ public List addManualAddEntries(Set manualAdds, Map> comments, int page,
String ruleSetId) {
- if (manualRedactions == null) {
- return;
+ List redactionLogEntities = new ArrayList<>();
+
+ if (manualAdds == null) {
+ return redactionLogEntities;
}
- for (ManualRedactionEntry manualRedactionEntry : manualRedactions.getEntriesToAdd()) {
+ for (ManualRedactionEntry manualRedactionEntry : manualAdds) {
String id = manualRedactionEntry.getId();
@@ -254,11 +261,13 @@ public class RedactionLogCreatorService {
}
}
- redactionLogEntry.setComments(manualRedactions.getComments().get(id));
+ redactionLogEntry.setComments(comments.get(id));
if (!rectanglesOnPage.isEmpty() && !approvedAndShouldBeInDictionary(manualRedactionEntry)) {
- classifiedDoc.getRedactionLogEntities().add(redactionLogEntry);
+ redactionLogEntities.add(redactionLogEntry);
}
}
+
+ return redactionLogEntities;
}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Cell.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Cell.java
index a7c4fbba..b6efcb3f 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Cell.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Cell.java
@@ -11,10 +11,12 @@ import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizati
import lombok.Data;
import lombok.EqualsAndHashCode;
+import lombok.NoArgsConstructor;
@SuppressWarnings("serial")
@Data
@EqualsAndHashCode(callSuper = true)
+@NoArgsConstructor
public class Cell extends Rectangle {
private List textBlocks = new ArrayList<>();
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java
index 7de575c6..f971651c 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java
@@ -4,8 +4,12 @@ import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;
import static org.springframework.boot.test.context.SpringBootTest.WebEnvironment.RANDOM_PORT;
+import java.awt.Color;
+import java.awt.geom.AffineTransform;
+import java.awt.geom.Rectangle2D;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
@@ -17,6 +21,7 @@ import java.nio.charset.StandardCharsets;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -24,7 +29,15 @@ import java.util.UUID;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDPageContentStream;
+import org.apache.pdfbox.pdmodel.common.PDRectangle;
+import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
+import org.apache.pdfbox.text.PDFTextStripperByArea;
+import org.apache.pdfbox.util.Matrix;
import org.junit.Before;
+import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.kie.api.KieServices;
@@ -32,6 +45,7 @@ import org.kie.api.builder.KieBuilder;
import org.kie.api.builder.KieFileSystem;
import org.kie.api.builder.KieModule;
import org.kie.api.runtime.KieContainer;
+import org.mockito.MockitoAnnotations;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.context.TestConfiguration;
@@ -42,6 +56,7 @@ import org.springframework.test.context.junit4.SpringRunner;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.configuration.v1.api.model.Colors;
+import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
import com.iqser.red.service.configuration.v1.api.model.DictionaryResponse;
import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
@@ -56,17 +71,28 @@ import com.iqser.red.service.redaction.v1.model.ManualForceRedact;
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
import com.iqser.red.service.redaction.v1.model.Point;
+import com.iqser.red.service.redaction.v1.model.ReanalyzeResult;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
import com.iqser.red.service.redaction.v1.model.RedactionResult;
+import com.iqser.red.service.redaction.v1.model.RenalyzeRequest;
+import com.iqser.red.service.redaction.v1.model.SectionArea;
import com.iqser.red.service.redaction.v1.model.SectionText;
import com.iqser.red.service.redaction.v1.model.Status;
+import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
+import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
+import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
+import com.iqser.red.service.redaction.v1.server.parsing.PDFAreaTextStripper;
+import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
+import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
+import com.iqser.red.service.redaction.v1.server.redaction.model.ReanalysisSection;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
+import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
@RunWith(SpringRunner.class)
@SpringBootTest(webEnvironment = RANDOM_PORT)
@@ -112,6 +138,7 @@ public class RedactionIntegrationTest {
private final Map recommendationTypeMap = new HashMap<>();
private final Map rankTypeMap = new HashMap<>();
private final Colors colors = new Colors();
+ private final Map reanlysisVersions = new HashMap<>();
private final static String TEST_RULESET_ID = "123";
@@ -376,7 +403,7 @@ public class RedactionIntegrationTest {
return DictionaryResponse.builder()
.hexColor(typeColorMap.get(type))
- .entries(dictionary.get(type))
+ .entries(toDictionaryEntry(dictionary.get(type)))
.isHint(hintTypeMap.get(type))
.isCaseInsensitive(caseInSensitiveMap.get(type))
.isRecommendation(recommendationTypeMap.get(type))
@@ -385,6 +412,15 @@ public class RedactionIntegrationTest {
}
+ private List toDictionaryEntry(List entries){
+ List dictionaryEntries = new ArrayList<>();
+ entries.forEach(entry -> {
+ dictionaryEntries.add(new DictionaryEntry(entry, reanlysisVersions.containsKey(entry) ? reanlysisVersions.get(entry) : 0L, false));
+ });
+ return dictionaryEntries;
+ }
+
+
@Test
public void noExceptionShouldBeThrownForAnyFiles() throws IOException {
@@ -414,6 +450,22 @@ public class RedactionIntegrationTest {
assertThat(entry.getValue().size()).isEqualTo(1);
});
+
+ dictionary.get(AUTHOR).add("Drinking water");
+ when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(1L);
+
+ long rstart = System.currentTimeMillis();
+ ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder()
+ .redactionLog(result.getRedactionLog())
+ .document(IOUtils.toByteArray(new FileInputStream(path)))
+ .manualRedactions(null)
+ .text(result.getText())
+ .ruleSetId(TEST_RULESET_ID)
+ .build());
+
+ long rend = System.currentTimeMillis();
+ System.out.println("reanalysis analysis duration: " + (rend - rstart));
+
}
long end = System.currentTimeMillis();
@@ -455,6 +507,86 @@ public class RedactionIntegrationTest {
AnalyzeResult result = redactionController.analyze(request);
+ long end = System.currentTimeMillis();
+
+ System.out.println("first analysis duration: " + (end - start));
+
+ try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Test.json")) {
+ fileOutputStream.write(objectMapper.writeValueAsBytes(result.getText()));
+ }
+
+ int correctFound = 0;
+ loop:
+ for (RedactionLogEntry redactionLogEntry : result.getRedactionLog().getRedactionLogEntry()) {
+ for (SectionText sectionText : result.getText().getSectionTexts()) {
+ if (redactionLogEntry.getType().equals("image")) {
+ correctFound++;
+ continue loop;
+ }
+ if (redactionLogEntry.getSectionNumber() == sectionText.getSectionNumber()) {
+ String value = sectionText.getText()
+ .substring(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset());
+ if (redactionLogEntry.getValue().equalsIgnoreCase(value)) {
+ correctFound++;
+ } else {
+ throw new RuntimeException("WTF");
+ }
+ }
+ }
+ }
+ assertThat(correctFound).isEqualTo(result.getRedactionLog().getRedactionLogEntry().size());
+
+
+ dictionary.get(AUTHOR).add("properties");
+ reanlysisVersions.put("properties", 1L);
+
+ dictionary.get(AUTHOR).add("physical");
+ reanlysisVersions.put("physical", 2L);
+
+ when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(2L);
+ when(dictionaryClient.getDictionaryForType(AUTHOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(AUTHOR));
+
+ start = System.currentTimeMillis();
+ ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder()
+ .redactionLog(result.getRedactionLog())
+ .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
+ .manualRedactions(null)
+ .text(result.getText())
+ .ruleSetId(TEST_RULESET_ID)
+ .build());
+
+ end = System.currentTimeMillis();
+ System.out.println("reanalysis analysis duration: " + (end - start));
+
+ AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
+ .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
+ .redactionLog(reanalyzeResult.getRedactionLog())
+ .sectionGrid(result.getSectionGrid())
+ .build());
+
+ try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
+ fileOutputStream.write(annotateResponse.getDocument());
+ }
+
+ }
+
+
+
+ @Test
+ @Ignore
+ public void fillRecanTest() throws IOException {
+
+ System.out.println("redactionTest");
+ long start = System.currentTimeMillis();
+ ClassPathResource pdfFileResource = new ClassPathResource("files/S5.pdf");
+
+ AnalyzeRequest request = AnalyzeRequest.builder()
+ .ruleSetId(TEST_RULESET_ID)
+ .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
+ .build();
+
+ AnalyzeResult result = redactionController.analyze(request);
+
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
.redactionLog(result.getRedactionLog())
@@ -496,9 +628,70 @@ public class RedactionIntegrationTest {
System.out.println("duration: " + (end - start));
System.out.println("numberOfPages: " + result.getNumberOfPages());
+
+ SectionArea sectionArea = result.getText().getSectionTexts().get(3).getSectionAreas().get(5);
+
+ try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(IOUtils.toByteArray(pdfFileResource.getInputStream())))) {
+
+ PDPage docPage = pdDocument.getPage(0);
+
+ PDFTextStripperByArea textStripper = new PDFTextStripperByArea();
+
+ PDRectangle cropBox = docPage.getCropBox();
+ PDRectangle mediaBox = docPage.getMediaBox();
+
+
+// if (textPositions.get(0).getRotation() == 90) {
+// posXEnd = textPositions.get(0).getYDirAdj() + 2;
+// posYInit = getY1();
+// posYEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() - height + 4;
+// } else {
+// posXEnd = textPositions.get(textPositions.size() - 1)
+// .getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidth() + 1;
+// posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2;
+// posYEnd = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1)
+// .getYDirAdj() + 2;
+// }
+
+
+ Rectangle2D rect = new Rectangle2D.Float(sectionArea.getTopLeft()
+ .getY(), sectionArea.getTopLeft()
+ .getX() , sectionArea.getHeight(), sectionArea
+ .getWidth() + 0.001f);
+
+ textStripper.addRegion("region", rect);
+
+
+
+ textStripper.extractRegions(docPage);
+
+ String textForRegion = textStripper.getTextForRegion("region");
+
+ System.out.println(textForRegion);
+
+ // fill a rectangle
+ PDPageContentStream contents = new PDPageContentStream (pdDocument, docPage, PDPageContentStream.AppendMode.APPEND, false, false);
+ contents.setNonStrokingColor (Color.RED);
+ contents.addRect (sectionArea.getTopLeft().getX(), sectionArea.getTopLeft().getY(), sectionArea.getWidth(), sectionArea.getHeight());
+ contents.fill ();
+ contents.close ();
+ try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
+ pdDocument.save(byteArrayOutputStream);
+ try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated2.pdf")) {
+ fileOutputStream.write(byteArrayOutputStream.toByteArray());
+ }
+ }
+
+ } catch (Exception e) {
+ throw new RedactionException(e);
+ }
+
}
+
+
+
@Test
public void testTableRedaction() throws IOException {
@@ -569,7 +762,7 @@ public class RedactionIntegrationTest {
manualRedactionEntry.setReason("Manual Redaction");
manualRedactionEntry.setPositions(List.of(new Rectangle(new Point(375.61096f, 241.282f), 7.648041f, 43.72262f, 1), new Rectangle(new Point(384.83517f, 241.282f), 7.648041f, 17.043358f, 1)));
- manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
+// manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
AnalyzeRequest request = AnalyzeRequest.builder()
.ruleSetId(TEST_RULESET_ID)
@@ -579,9 +772,25 @@ public class RedactionIntegrationTest {
AnalyzeResult result = redactionController.analyze(request);
+ manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
+ manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder()
+ .id("5b940b2cb401ed9f5be6fc24f6e77bcf")
+ .status(Status.APPROVED)
+ .build()));
+
+
+ ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder()
+ .redactionLog(result.getRedactionLog())
+ .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
+ .manualRedactions(manualRedactions)
+ .text(result.getText())
+ .ruleSetId(TEST_RULESET_ID)
+ .build());
+
+
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .redactionLog(result.getRedactionLog())
+ .redactionLog(reanalyzeResult.getRedactionLog())
.sectionGrid(result.getSectionGrid())
.build());
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java
index fb2fae12..b7efed93 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java
@@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import com.iqser.red.service.configuration.v1.api.model.Colors;
+import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
import com.iqser.red.service.configuration.v1.api.model.DictionaryResponse;
import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
@@ -129,12 +130,12 @@ public class EntityRedactionServiceTest {
.build();
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
- .entries(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H."))
+ .entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H.")))
.build();
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
- .entries(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA"))
+ .entries(toDictionaryEntry(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA")))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
@@ -162,12 +163,12 @@ public class EntityRedactionServiceTest {
.build();
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
- .entries(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H."))
+ .entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H.")))
.build();
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
- .entries(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA"))
+ .entries(toDictionaryEntry(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA")))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
@@ -191,11 +192,11 @@ public class EntityRedactionServiceTest {
" Supplement - Identity of the active substance - Reference list.pdf");
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
- .entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt")))
+ .entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
- .entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt")))
+ .entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
@@ -228,15 +229,15 @@ public class EntityRedactionServiceTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Row With Ambiguous Redaction.pdf");
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
- .entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt")))
+ .entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
- .entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt")))
+ .entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
- .entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt")))
+ .entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
@@ -297,11 +298,11 @@ public class EntityRedactionServiceTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Applicant Producer Table.pdf");
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
- .entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt")))
+ .entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
- .entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt")))
+ .entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
@@ -346,7 +347,7 @@ public class EntityRedactionServiceTest {
.build();
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(authorResponse);
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
- .entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt")))
+ .entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
@@ -367,13 +368,13 @@ public class EntityRedactionServiceTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Header Propagation.pdf");
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
- .entries(Arrays.asList("Bissig R.", "Thanei P."))
+ .entries(toDictionaryEntry(Arrays.asList("Bissig R.", "Thanei P.")))
.build();
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
- .entries(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland"))
+ .entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
@@ -392,13 +393,13 @@ public class EntityRedactionServiceTest {
pdfFileResource = new ClassPathResource("files/Minimal Examples/Header Propagation2.pdf");
dictionaryResponse = DictionaryResponse.builder()
- .entries(Arrays.asList("Tribolet, R.", "Muir, G.", "Kühne-Thu, H.", "Close, C."))
+ .entries(toDictionaryEntry(Arrays.asList("Tribolet, R.", "Muir, G.", "Kühne-Thu, H.", "Close, C.")))
.build();
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
addressResponse = DictionaryResponse.builder()
- .entries(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland"))
+ .entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
@@ -419,13 +420,13 @@ public class EntityRedactionServiceTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Empty Tabular Data.pdf");
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
- .entries(Collections.singletonList("Aldershof S."))
+ .entries(toDictionaryEntry(Collections.singletonList("Aldershof S.")))
.build();
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
- .entries(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland"))
+ .entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
@@ -517,4 +518,12 @@ public class EntityRedactionServiceTest {
}
}
+ private List toDictionaryEntry(List entries){
+ List dictionaryEntries = new ArrayList<>();
+ entries.forEach(entry -> {
+ dictionaryEntries.add(new DictionaryEntry(entry, 1L, false));
+ });
+ return dictionaryEntries;
+ }
+
}
\ No newline at end of file