Pull request #126: First steps for incremental analysis
Merge in RED/redaction-service from incrementAnalysis to master * commit '511092b9e76d58af33dfff90c1133b92e850d47c': First steps for incremental analysis
This commit is contained in:
commit
9db74628a4
@ -0,0 +1,15 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class ReanalyzeResult {
|
||||
|
||||
private RedactionLog redactionLog;
|
||||
}
|
||||
@ -0,0 +1,22 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import java.time.OffsetDateTime;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class RenalyzeRequest {
|
||||
|
||||
private byte[] document;
|
||||
private String ruleSetId;
|
||||
private ManualRedactions manualRedactions;
|
||||
private Text text;
|
||||
private RedactionLog redactionLog;
|
||||
private OffsetDateTime lastProcessed;
|
||||
}
|
||||
@ -26,4 +26,8 @@ public class SectionArea {
|
||||
|
||||
private String header;
|
||||
|
||||
public boolean contains(Rectangle other) {
|
||||
return page == other.getPage() && this.topLeft.getX() <= other.getTopLeft().getX() && this.topLeft.getX() + this.getWidth() >= other.getTopLeft().getX() + other.getWidth() && this.getTopLeft().getY() <= other.getTopLeft().getY() && this.getTopLeft().getY() + this.getHeight() >= other.getTopLeft().getY() + other.getHeight();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -4,8 +4,11 @@ import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.AnnotateRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.AnnotateResponse;
|
||||
import com.iqser.red.service.redaction.v1.model.ReanalyzeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionResult;
|
||||
import com.iqser.red.service.redaction.v1.model.RenalyzeRequest;
|
||||
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.web.bind.annotation.PathVariable;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
@ -21,6 +24,9 @@ public interface RedactionResource {
|
||||
@PostMapping(value = "/analyze", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
||||
AnalyzeResult analyze(@RequestBody AnalyzeRequest analyzeRequest);
|
||||
|
||||
@PostMapping(value = "/reanalyze", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
||||
ReanalyzeResult reanalyze(@RequestBody RenalyzeRequest renalyzeRequest);
|
||||
|
||||
@PostMapping(value = "/annotate", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
||||
AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest);
|
||||
|
||||
|
||||
@ -20,7 +20,7 @@
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<artifactId>configuration-service-api-v1</artifactId>
|
||||
<version>2.0.0</version>
|
||||
<version>2.2.9</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.drools</groupId>
|
||||
|
||||
@ -4,10 +4,12 @@ import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.AnnotateRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.AnnotateResponse;
|
||||
import com.iqser.red.service.redaction.v1.model.ReanalyzeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLog;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionResult;
|
||||
import com.iqser.red.service.redaction.v1.model.RenalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.SectionGrid;
|
||||
import com.iqser.red.service.redaction.v1.model.Text;
|
||||
import com.iqser.red.service.redaction.v1.resources.RedactionResource;
|
||||
@ -18,13 +20,16 @@ import com.iqser.red.service.redaction.v1.server.redaction.service.AnnotationSer
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.EntityRedactionService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.ReanalyzeService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogCreatorService;
|
||||
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.springframework.web.bind.annotation.PathVariable;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
@ -47,6 +52,7 @@ public class RedactionController implements RedactionResource {
|
||||
private final DroolsExecutionService droolsExecutionService;
|
||||
private final DictionaryService dictionaryService;
|
||||
private final AnnotationService annotationService;
|
||||
private final ReanalyzeService reanalyzeService;
|
||||
|
||||
|
||||
@Override
|
||||
@ -68,7 +74,7 @@ public class RedactionController implements RedactionResource {
|
||||
return AnalyzeResult.builder()
|
||||
.sectionGrid(classifiedDoc.getSectionGrid())
|
||||
.redactionLog(new RedactionLog(classifiedDoc.getRedactionLogEntities(), classifiedDoc.getDictionaryVersion(), classifiedDoc
|
||||
.getRulesVersion(), analyzeRequest.getRuleSetId()))
|
||||
.getRulesVersion(), analyzeRequest.getRuleSetId()))
|
||||
.numberOfPages(classifiedDoc.getPages().size())
|
||||
.text(new Text(classifiedDoc.getSectionText()))
|
||||
.build();
|
||||
@ -80,6 +86,12 @@ public class RedactionController implements RedactionResource {
|
||||
}
|
||||
|
||||
|
||||
public ReanalyzeResult reanalyze(@RequestBody RenalyzeRequest renalyzeRequest) {
|
||||
|
||||
return reanalyzeService.reanalyze(renalyzeRequest);
|
||||
}
|
||||
|
||||
|
||||
public AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest) {
|
||||
|
||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(annotateRequest.getDocument()))) {
|
||||
|
||||
@ -0,0 +1,83 @@
|
||||
package com.iqser.red.service.redaction.v1.server.parsing;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.pdfbox.text.PDFTextStripperByArea;
|
||||
import org.apache.pdfbox.text.TextPosition;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
public class PDFAreaTextStripper extends PDFTextStripperByArea {
|
||||
|
||||
@Getter
|
||||
private List<TextPositionSequence> textPositionSequences = new ArrayList<>();
|
||||
|
||||
@Setter
|
||||
private int pageNumber;
|
||||
|
||||
public PDFAreaTextStripper() throws IOException {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeString(String text, List<TextPosition> textPositions) throws IOException {
|
||||
|
||||
int startIndex = 0;
|
||||
for (int i = 0; i <= textPositions.size() - 1; i++) {
|
||||
|
||||
if (i == 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i)
|
||||
.getUnicode()
|
||||
.equals("\u00A0"))) {
|
||||
startIndex++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Strange but sometimes this is happening, for example: Metolachlor2.pdf
|
||||
if (i > 0 && textPositions.get(i).getX() < textPositions.get(i - 1).getX()) {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, i);
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
}
|
||||
startIndex = i;
|
||||
}
|
||||
|
||||
if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i)
|
||||
.getUnicode()
|
||||
.equals("\u00A0")) && i <= textPositions.size() - 2) {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, i);
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
}
|
||||
startIndex = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, textPositions.size());
|
||||
if (!sublist.isEmpty() && (sublist.get(sublist.size() - 1)
|
||||
.getUnicode()
|
||||
.equals(" ") || sublist.get(sublist.size() - 1).getUnicode().equals("\u00A0"))) {
|
||||
sublist = sublist.subList(0, sublist.size() - 1);
|
||||
}
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals("\u00A0")))) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
}
|
||||
super.writeString(text);
|
||||
}
|
||||
|
||||
|
||||
public void clearPositions(){
|
||||
textPositionSequences = new ArrayList<>();
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,15 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
public class DictionaryIncrement {
|
||||
|
||||
private Set<String> values;
|
||||
private long dictionaryVersion;
|
||||
|
||||
}
|
||||
@ -3,6 +3,9 @@ package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
@ -17,11 +20,12 @@ public class DictionaryModel implements Serializable {
|
||||
private boolean caseInsensitive;
|
||||
private boolean hint;
|
||||
private boolean recommendation;
|
||||
private Set<String> entries;
|
||||
private Set<DictionaryEntry> entries;
|
||||
private Set<String> localEntries;
|
||||
|
||||
public Set<String> getValues(boolean local){
|
||||
return local ? localEntries : entries;
|
||||
return local ? localEntries : entries.stream().filter(e -> !e.isDeleted()).map(e-> e.getValue()).collect(Collectors
|
||||
.toSet());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,34 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
public class ReanalysisSection {
|
||||
|
||||
private int sectionNumber;
|
||||
private String headline;
|
||||
private List<TextBlock> textBlocks;
|
||||
private Map<String, CellValue> tabularData = new HashMap<>();
|
||||
private List<Integer> cellStarts;
|
||||
|
||||
|
||||
public SearchableText getSearchableText() {
|
||||
|
||||
SearchableText searchableText = new SearchableText();
|
||||
textBlocks.forEach(block -> {
|
||||
if (block instanceof TextBlock) {
|
||||
searchableText.addAll(block.getSequences());
|
||||
}
|
||||
});
|
||||
return searchableText;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,42 +1,45 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import com.iqser.red.service.configuration.v1.api.model.Colors;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryRepresentation;
|
||||
import feign.FeignException;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.SerializationUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.SerializationUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.configuration.v1.api.model.Colors;
|
||||
import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryRepresentation;
|
||||
|
||||
import feign.FeignException;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class DictionaryService {
|
||||
|
||||
|
||||
private final DictionaryClient dictionaryClient;
|
||||
|
||||
|
||||
private Map<String, DictionaryRepresentation> dictionariesByRuleSets = new HashMap<>();
|
||||
|
||||
|
||||
public void updateDictionary(String ruleSetId) {
|
||||
public long updateDictionary(String ruleSetId) {
|
||||
|
||||
long version = dictionaryClient.getVersion(ruleSetId);
|
||||
|
||||
@ -45,6 +48,26 @@ public class DictionaryService {
|
||||
if (foundDictionary == null || version > foundDictionary.getDictionaryVersion()) {
|
||||
updateDictionaryEntry(ruleSetId, version);
|
||||
}
|
||||
|
||||
return version;
|
||||
}
|
||||
|
||||
|
||||
public DictionaryIncrement getDictionaryIncrements(String ruleSetId, long fromVersion) {
|
||||
|
||||
long version = updateDictionary(ruleSetId);
|
||||
|
||||
Set<String> newValues = new HashSet<>();
|
||||
List<DictionaryModel> dictionaryModels = dictionariesByRuleSets.get(ruleSetId).getDictionary();
|
||||
dictionaryModels.forEach(dictionaryModel -> {
|
||||
dictionaryModel.getEntries().forEach(dictionaryEntry -> {
|
||||
if (dictionaryEntry.getVersion() > fromVersion) {
|
||||
newValues.add(dictionaryEntry.getValue());
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return new DictionaryIncrement(newValues, version);
|
||||
}
|
||||
|
||||
|
||||
@ -63,7 +86,6 @@ public class DictionaryService {
|
||||
.sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed())
|
||||
.collect(Collectors.toList());
|
||||
|
||||
|
||||
dictionary.forEach(dm -> dictionaryRepresentation.getLocalAccessMap().put(dm.getType(), dm));
|
||||
|
||||
Colors colors = dictionaryClient.getColors(ruleSetId);
|
||||
@ -86,6 +108,7 @@ public class DictionaryService {
|
||||
|
||||
|
||||
public void updateExternalDictionary(Dictionary dictionary, String ruleSetId) {
|
||||
|
||||
dictionary.getDictionaryModels().forEach(dm -> {
|
||||
if (dm.isRecommendation() && !dm.getLocalEntries().isEmpty()) {
|
||||
dictionaryClient.addEntries(dm.getType(), ruleSetId, new ArrayList<>(dm.getLocalEntries()), false);
|
||||
@ -98,17 +121,15 @@ public class DictionaryService {
|
||||
}
|
||||
|
||||
|
||||
private Set<String> convertEntries(TypeResult t) {
|
||||
private Set<DictionaryEntry> convertEntries(TypeResult t) {
|
||||
|
||||
Set<DictionaryEntry> entries = new HashSet<>(dictionaryClient.getDictionaryForType(t.getType(), t.getRuleSetId())
|
||||
.getEntries());
|
||||
|
||||
if (t.isCaseInsensitive()) {
|
||||
return dictionaryClient.getDictionaryForType(t.getType(), t.getRuleSetId())
|
||||
.getEntries()
|
||||
.stream()
|
||||
.map(String::toLowerCase)
|
||||
.collect(Collectors.toSet());
|
||||
} else {
|
||||
return new HashSet<>(dictionaryClient.getDictionaryForType(t.getType(), t.getRuleSetId()).getEntries());
|
||||
entries.forEach(entry -> entry.getValue().toLowerCase(Locale.ROOT));
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
|
||||
@ -148,6 +169,7 @@ public class DictionaryService {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
public boolean isRecommendation(String type, String ruleSetId) {
|
||||
|
||||
DictionaryModel model = dictionariesByRuleSets.get(ruleSetId).getLocalAccessMap().get(type);
|
||||
@ -159,6 +181,7 @@ public class DictionaryService {
|
||||
|
||||
|
||||
public Dictionary getDeepCopyDictionary(String ruleSetId) {
|
||||
|
||||
List<DictionaryModel> copy = new ArrayList<>();
|
||||
|
||||
var representation = dictionariesByRuleSets.get(ruleSetId);
|
||||
@ -170,15 +193,22 @@ public class DictionaryService {
|
||||
return new Dictionary(copy, representation.getDictionaryVersion());
|
||||
}
|
||||
|
||||
|
||||
public float[] getRequestRemoveColor(String ruleSetId) {
|
||||
|
||||
return dictionariesByRuleSets.get(ruleSetId).getRequestAddColor();
|
||||
}
|
||||
|
||||
|
||||
public float[] getNotRedactedColor(String ruleSetId) {
|
||||
|
||||
return dictionariesByRuleSets.get(ruleSetId).getNotRedactedColor();
|
||||
}
|
||||
|
||||
|
||||
public float[] getRequestAddColor(String ruleSetId) {
|
||||
|
||||
return dictionariesByRuleSets.get(ruleSetId).getRequestAddColor();
|
||||
}
|
||||
|
||||
}
|
||||
@ -349,7 +349,7 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber,
|
||||
public Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber,
|
||||
Dictionary dictionary, boolean local) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
|
||||
@ -0,0 +1,281 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Comment;
|
||||
import com.iqser.red.service.redaction.v1.model.IdRemoval;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualForceRedact;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
|
||||
import com.iqser.red.service.redaction.v1.model.ReanalyzeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.RenalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.SectionArea;
|
||||
import com.iqser.red.service.redaction.v1.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.PDFAreaTextStripper;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.ReanalysisSection;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class ReanalyzeService {
|
||||
|
||||
private final DictionaryService dictionaryService;
|
||||
private final DroolsExecutionService droolsExecutionService;
|
||||
private final SurroundingWordsService surroundingWordsService;
|
||||
private final EntityRedactionService entityRedactionService;
|
||||
private final RedactionLogCreatorService redactionLogCreatorService;
|
||||
|
||||
|
||||
public ReanalyzeResult reanalyze(@RequestBody RenalyzeRequest renalyzeRequest) {
|
||||
|
||||
DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(renalyzeRequest.getRuleSetId(), renalyzeRequest
|
||||
.getRedactionLog()
|
||||
.getDictionaryVersion());
|
||||
|
||||
Set<String> manualForceAndRemoveIds = getForceAndRemoveIds(renalyzeRequest.getManualRedactions());
|
||||
Map<String, List<Comment>> comments = null;
|
||||
Set<ManualRedactionEntry> manualAdds = null;
|
||||
|
||||
if (renalyzeRequest.getManualRedactions() != null) {
|
||||
// TODO comments will be removed from redactionLog, so we ignore this first.
|
||||
comments = renalyzeRequest.getManualRedactions().getComments();
|
||||
manualAdds = renalyzeRequest.getManualRedactions().getEntriesToAdd();
|
||||
}
|
||||
|
||||
Set<Integer> sectionsToReanaylse = new HashSet<>();
|
||||
for (RedactionLogEntry entry : renalyzeRequest.getRedactionLog().getRedactionLogEntry()) {
|
||||
if (entry.isManual() || manualForceAndRemoveIds.contains(entry.getId())) {
|
||||
sectionsToReanaylse.add(entry.getSectionNumber());
|
||||
}
|
||||
}
|
||||
|
||||
for (SectionText sectionText : renalyzeRequest.getText().getSectionTexts()) {
|
||||
Set<Entity> entities = EntitySearchUtils.find(sectionText.getText(), dictionaryIncrement.getValues(), "find", sectionText
|
||||
.getHeadline(), sectionText.getSectionNumber(), false);
|
||||
if (!entities.isEmpty()) {
|
||||
sectionsToReanaylse.add(sectionText.getSectionNumber());
|
||||
}
|
||||
|
||||
if (manualAdds != null) {
|
||||
for (SectionArea sectionArea : sectionText.getSectionAreas()) {
|
||||
for (ManualRedactionEntry manualAdd : manualAdds) {
|
||||
for (Rectangle manualPosition : manualAdd.getPositions()) {
|
||||
if (sectionArea.contains(manualPosition)) {
|
||||
manualAdd.setSection(sectionText.getHeadline());
|
||||
manualAdd.setSectionNumber(sectionText.getSectionNumber());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (sectionsToReanaylse.isEmpty() && (manualAdds == null || manualAdds.isEmpty())) {
|
||||
renalyzeRequest.getRedactionLog().setDictionaryVersion(dictionaryIncrement.getDictionaryVersion());
|
||||
return ReanalyzeResult.builder().redactionLog(renalyzeRequest.getRedactionLog()).build();
|
||||
}
|
||||
|
||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(renalyzeRequest.getDocument()))) {
|
||||
|
||||
List<ReanalysisSection> reanalysisSections = new ArrayList<>();
|
||||
for (SectionText sectionText : renalyzeRequest.getText().getSectionTexts()) {
|
||||
|
||||
if (!sectionsToReanaylse.contains(sectionText.getSectionNumber())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ReanalysisSection reanalysisSection = new ReanalysisSection();
|
||||
reanalysisSection.setHeadline(sectionText.getHeadline());
|
||||
reanalysisSection.setSectionNumber(sectionText.getSectionNumber());
|
||||
List<TextBlock> textBlocks = new ArrayList<>();
|
||||
|
||||
Map<Integer, List<SectionArea>> sectionAreasPerPage = new HashMap<>();
|
||||
for (SectionArea sectionArea : sectionText.getSectionAreas()) {
|
||||
sectionAreasPerPage.computeIfAbsent(sectionArea.getPage(), (x) -> new ArrayList<>())
|
||||
.add(sectionArea);
|
||||
}
|
||||
|
||||
Map<String, CellValue> tabularData = new HashMap<>();
|
||||
List<Integer> cellStarts = new ArrayList<>();
|
||||
for (Integer page : sectionAreasPerPage.keySet()) {
|
||||
List<SectionArea> areasOnPage = sectionAreasPerPage.get(page);
|
||||
|
||||
PDPage pdPage = pdDocument.getPage(page - 1);
|
||||
PDRectangle cropBox = pdPage.getCropBox();
|
||||
PDFAreaTextStripper textStripper = new PDFAreaTextStripper();
|
||||
textStripper.setPageNumber(page);
|
||||
|
||||
int cellStart = 0;
|
||||
for (SectionArea sectionArea : areasOnPage) {
|
||||
|
||||
Rectangle2D rect = null;
|
||||
if (pdPage.getRotation() == 90) {
|
||||
rect = new Rectangle2D.Float(sectionArea.getTopLeft().getY(), sectionArea.getTopLeft()
|
||||
.getX(), sectionArea.getHeight(), sectionArea.getWidth() + 0.001f);
|
||||
} else {
|
||||
rect = new Rectangle2D.Float(sectionArea.getTopLeft().getX(), -sectionArea.getTopLeft()
|
||||
.getY() + cropBox.getUpperRightY() - sectionArea.getHeight(), sectionArea.getWidth(), sectionArea
|
||||
.getHeight() + 0.001f);
|
||||
}
|
||||
|
||||
textStripper.addRegion(String.valueOf(1), rect);
|
||||
textStripper.extractRegions(pdPage);
|
||||
textStripper.getTextForRegion(String.valueOf(1));
|
||||
List<TextPositionSequence> positions = textStripper.getTextPositionSequences();
|
||||
|
||||
TextBlock textBlock = new TextBlock(sectionArea.getTopLeft().getX(), sectionArea.getTopLeft()
|
||||
.getX() + sectionArea.getWidth(), sectionArea.getTopLeft()
|
||||
.getY(), sectionArea.getTopLeft().getY() + sectionArea.getHeight(), positions, 0);
|
||||
|
||||
if (sectionText.isTable()) {
|
||||
Cell cell = new Cell();
|
||||
cell.addTextBlock(textBlock);
|
||||
tabularData.put(sectionArea.getHeader(), new CellValue(cell.getTextBlocks(), cellStart));
|
||||
cellStarts.add(cellStart);
|
||||
cellStart = cellStart + cell.toString().trim().length() + 1;
|
||||
}
|
||||
|
||||
textBlocks.add(textBlock);
|
||||
textStripper.clearPositions();
|
||||
}
|
||||
|
||||
}
|
||||
reanalysisSection.setTextBlocks(textBlocks);
|
||||
reanalysisSection.setTabularData(tabularData);
|
||||
reanalysisSections.add(reanalysisSection);
|
||||
if (sectionText.isTable()) {
|
||||
reanalysisSection.setCellStarts(cellStarts);
|
||||
}
|
||||
}
|
||||
|
||||
//--
|
||||
|
||||
KieContainer kieContainer = droolsExecutionService.updateRules(renalyzeRequest.getRuleSetId());
|
||||
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(renalyzeRequest.getRuleSetId());
|
||||
|
||||
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
|
||||
for (ReanalysisSection reanalysisSection : reanalysisSections) {
|
||||
|
||||
Set<Entity> entities = entityRedactionService.findEntities(reanalysisSection.getSearchableText(), reanalysisSection
|
||||
.getHeadline(), reanalysisSection.getSectionNumber(), dictionary, false);
|
||||
if (reanalysisSection.getCellStarts() != null) {
|
||||
surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary, reanalysisSection
|
||||
.getCellStarts());
|
||||
} else {
|
||||
surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary);
|
||||
}
|
||||
|
||||
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
|
||||
.isLocal(false)
|
||||
.dictionaryTypes(dictionary.getTypes())
|
||||
.entities(entities)
|
||||
.text(reanalysisSection.getSearchableText().getAsStringWithLinebreaks())
|
||||
.searchText(reanalysisSection.getSearchableText().toString())
|
||||
.headline(reanalysisSection.getHeadline())
|
||||
.sectionNumber(reanalysisSection.getSectionNumber())
|
||||
.tabularData(reanalysisSection.getTabularData())
|
||||
.searchableText(reanalysisSection.getSearchableText())
|
||||
.dictionary(dictionary)
|
||||
.build(), reanalysisSection.getSearchableText()));
|
||||
}
|
||||
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
|
||||
Section analysedRowSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair
|
||||
.getSection());
|
||||
entities.addAll(analysedRowSection.getEntities());
|
||||
});
|
||||
|
||||
Map<Integer, List<Entity>> entitiesPerPage = new HashMap<>();
|
||||
for (Entity entity : entities) {
|
||||
Map<Integer, List<EntityPositionSequence>> sequenceOnPage = new HashMap<>();
|
||||
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
|
||||
sequenceOnPage.computeIfAbsent(entityPositionSequence.getPageNumber(), (x) -> new ArrayList<>())
|
||||
.add(entityPositionSequence);
|
||||
}
|
||||
|
||||
for (Map.Entry<Integer, List<EntityPositionSequence>> entry : sequenceOnPage.entrySet()) {
|
||||
entitiesPerPage.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
|
||||
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
|
||||
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity
|
||||
.getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity
|
||||
.getStart(), entity.getEnd()));
|
||||
}
|
||||
}
|
||||
|
||||
List<RedactionLogEntry> newRedactionLogEntries = new ArrayList<>();
|
||||
for (int page = 1; page <= pdDocument.getNumberOfPages(); page++) {
|
||||
if (entitiesPerPage.get(page) != null) {
|
||||
newRedactionLogEntries.addAll(redactionLogCreatorService.addEntries(entitiesPerPage, renalyzeRequest
|
||||
.getManualRedactions(), page, renalyzeRequest.getRuleSetId()));
|
||||
}
|
||||
newRedactionLogEntries.addAll(redactionLogCreatorService.addManualAddEntries(manualAdds, comments, page, renalyzeRequest
|
||||
.getRuleSetId()));
|
||||
}
|
||||
|
||||
Iterator<RedactionLogEntry> itty = renalyzeRequest.getRedactionLog().getRedactionLogEntry().iterator();
|
||||
while (itty.hasNext()) {
|
||||
RedactionLogEntry entry = itty.next();
|
||||
if (sectionsToReanaylse.contains(entry.getSectionNumber()) || entry.getSectionNumber() == 0) {
|
||||
itty.remove();
|
||||
}
|
||||
}
|
||||
|
||||
renalyzeRequest.getRedactionLog().getRedactionLogEntry().addAll(newRedactionLogEntries);
|
||||
renalyzeRequest.getRedactionLog().setDictionaryVersion(dictionaryIncrement.getDictionaryVersion());
|
||||
|
||||
return ReanalyzeResult.builder().redactionLog(renalyzeRequest.getRedactionLog()).build();
|
||||
|
||||
} catch (Exception e) {
|
||||
throw new RedactionException(e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
private Set<String> getForceAndRemoveIds(ManualRedactions manualRedactions) {
|
||||
|
||||
if (manualRedactions == null) {
|
||||
return new HashSet<>();
|
||||
}
|
||||
|
||||
return Stream.concat(manualRedactions.getIdsToRemove()
|
||||
.stream()
|
||||
.map(IdRemoval::getId), manualRedactions.getForceRedacts().stream().map(ManualForceRedact::getId))
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
}
|
||||
@ -4,6 +4,7 @@ import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@ -55,11 +56,11 @@ public class RedactionLogCreatorService {
|
||||
addSectionGrid(classifiedDoc, page);
|
||||
|
||||
if (classifiedDoc.getEntities().get(page) != null) {
|
||||
addEntries(classifiedDoc, manualRedactions, page, ruleSetId);
|
||||
classifiedDoc.getRedactionLogEntities().addAll(addEntries(classifiedDoc.getEntities(), manualRedactions, page, ruleSetId));
|
||||
}
|
||||
|
||||
if (manualRedactionPages.contains(page)) {
|
||||
addManualEntries(classifiedDoc, manualRedactions, page, ruleSetId);
|
||||
classifiedDoc.getRedactionLogEntities().addAll(addManualAddEntries(manualRedactions.getEntriesToAdd(), manualRedactions.getComments(), page, ruleSetId));
|
||||
}
|
||||
|
||||
if (!classifiedDoc.getPages().get(page - 1).getImageBounds().isEmpty()) {
|
||||
@ -106,13 +107,15 @@ public class RedactionLogCreatorService {
|
||||
}
|
||||
|
||||
|
||||
private void addEntries(Document classifiedDoc, ManualRedactions manualRedactions, int page, String ruleSetId) {
|
||||
public List<RedactionLogEntry> addEntries(Map<Integer, List<Entity>> entities, ManualRedactions manualRedactions, int page, String ruleSetId) {
|
||||
|
||||
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
|
||||
|
||||
// Duplicates can exist due table extraction colums over multiple rows.
|
||||
Set<String> processedIds = new HashSet<>();
|
||||
|
||||
entityLoop:
|
||||
for (Entity entity : classifiedDoc.getEntities().get(page)) {
|
||||
for (Entity entity : entities.get(page)) {
|
||||
|
||||
List<Comment> comments = null;
|
||||
|
||||
@ -201,10 +204,12 @@ public class RedactionLogCreatorService {
|
||||
|
||||
// FIXME ids should never be null. Figure out why this happens.
|
||||
if (redactionLogEntry.getId() != null) {
|
||||
classifiedDoc.getRedactionLogEntities().add(redactionLogEntry);
|
||||
redactionLogEntities.add(redactionLogEntry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return redactionLogEntities;
|
||||
}
|
||||
|
||||
|
||||
@ -233,14 +238,16 @@ public class RedactionLogCreatorService {
|
||||
}
|
||||
|
||||
|
||||
private void addManualEntries(Document classifiedDoc, ManualRedactions manualRedactions, int page,
|
||||
public List<RedactionLogEntry> addManualAddEntries(Set<ManualRedactionEntry> manualAdds, Map<String, List<Comment>> comments, int page,
|
||||
String ruleSetId) {
|
||||
|
||||
if (manualRedactions == null) {
|
||||
return;
|
||||
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
|
||||
|
||||
if (manualAdds == null) {
|
||||
return redactionLogEntities;
|
||||
}
|
||||
|
||||
for (ManualRedactionEntry manualRedactionEntry : manualRedactions.getEntriesToAdd()) {
|
||||
for (ManualRedactionEntry manualRedactionEntry : manualAdds) {
|
||||
|
||||
String id = manualRedactionEntry.getId();
|
||||
|
||||
@ -254,11 +261,13 @@ public class RedactionLogCreatorService {
|
||||
}
|
||||
}
|
||||
|
||||
redactionLogEntry.setComments(manualRedactions.getComments().get(id));
|
||||
redactionLogEntry.setComments(comments.get(id));
|
||||
if (!rectanglesOnPage.isEmpty() && !approvedAndShouldBeInDictionary(manualRedactionEntry)) {
|
||||
classifiedDoc.getRedactionLogEntities().add(redactionLogEntry);
|
||||
redactionLogEntities.add(redactionLogEntry);
|
||||
}
|
||||
}
|
||||
|
||||
return redactionLogEntities;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -11,10 +11,12 @@ import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizati
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@SuppressWarnings("serial")
|
||||
@Data
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@NoArgsConstructor
|
||||
public class Cell extends Rectangle {
|
||||
|
||||
private List<TextBlock> textBlocks = new ArrayList<>();
|
||||
|
||||
@ -4,8 +4,12 @@ import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.Mockito.when;
|
||||
import static org.springframework.boot.test.context.SpringBootTest.WebEnvironment.RANDOM_PORT;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
@ -17,6 +21,7 @@ import java.nio.charset.StandardCharsets;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
@ -24,7 +29,15 @@ import java.util.UUID;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
||||
import org.apache.pdfbox.text.PDFTextStripperByArea;
|
||||
import org.apache.pdfbox.util.Matrix;
|
||||
import org.junit.Before;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.kie.api.KieServices;
|
||||
@ -32,6 +45,7 @@ import org.kie.api.builder.KieBuilder;
|
||||
import org.kie.api.builder.KieFileSystem;
|
||||
import org.kie.api.builder.KieModule;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.mockito.MockitoAnnotations;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.context.TestConfiguration;
|
||||
@ -42,6 +56,7 @@ import org.springframework.test.context.junit4.SpringRunner;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.configuration.v1.api.model.Colors;
|
||||
import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
|
||||
import com.iqser.red.service.configuration.v1.api.model.DictionaryResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
||||
@ -56,17 +71,28 @@ import com.iqser.red.service.redaction.v1.model.ManualForceRedact;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
|
||||
import com.iqser.red.service.redaction.v1.model.Point;
|
||||
import com.iqser.red.service.redaction.v1.model.ReanalyzeResult;
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionResult;
|
||||
import com.iqser.red.service.redaction.v1.model.RenalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.SectionArea;
|
||||
import com.iqser.red.service.redaction.v1.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.model.Status;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.PDFAreaTextStripper;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.ReanalysisSection;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
|
||||
@RunWith(SpringRunner.class)
|
||||
@SpringBootTest(webEnvironment = RANDOM_PORT)
|
||||
@ -112,6 +138,7 @@ public class RedactionIntegrationTest {
|
||||
private final Map<String, Boolean> recommendationTypeMap = new HashMap<>();
|
||||
private final Map<String, Integer> rankTypeMap = new HashMap<>();
|
||||
private final Colors colors = new Colors();
|
||||
private final Map<String, Long> reanlysisVersions = new HashMap<>();
|
||||
|
||||
private final static String TEST_RULESET_ID = "123";
|
||||
|
||||
@ -376,7 +403,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
return DictionaryResponse.builder()
|
||||
.hexColor(typeColorMap.get(type))
|
||||
.entries(dictionary.get(type))
|
||||
.entries(toDictionaryEntry(dictionary.get(type)))
|
||||
.isHint(hintTypeMap.get(type))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(type))
|
||||
.isRecommendation(recommendationTypeMap.get(type))
|
||||
@ -385,6 +412,15 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
private List<DictionaryEntry> toDictionaryEntry(List<String> entries){
|
||||
List<DictionaryEntry> dictionaryEntries = new ArrayList<>();
|
||||
entries.forEach(entry -> {
|
||||
dictionaryEntries.add(new DictionaryEntry(entry, reanlysisVersions.containsKey(entry) ? reanlysisVersions.get(entry) : 0L, false));
|
||||
});
|
||||
return dictionaryEntries;
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void noExceptionShouldBeThrownForAnyFiles() throws IOException {
|
||||
|
||||
@ -414,6 +450,22 @@ public class RedactionIntegrationTest {
|
||||
assertThat(entry.getValue().size()).isEqualTo(1);
|
||||
});
|
||||
|
||||
|
||||
dictionary.get(AUTHOR).add("Drinking water");
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(1L);
|
||||
|
||||
long rstart = System.currentTimeMillis();
|
||||
ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder()
|
||||
.redactionLog(result.getRedactionLog())
|
||||
.document(IOUtils.toByteArray(new FileInputStream(path)))
|
||||
.manualRedactions(null)
|
||||
.text(result.getText())
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
.build());
|
||||
|
||||
long rend = System.currentTimeMillis();
|
||||
System.out.println("reanalysis analysis duration: " + (rend - rstart));
|
||||
|
||||
}
|
||||
|
||||
long end = System.currentTimeMillis();
|
||||
@ -455,6 +507,86 @@ public class RedactionIntegrationTest {
|
||||
|
||||
AnalyzeResult result = redactionController.analyze(request);
|
||||
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
System.out.println("first analysis duration: " + (end - start));
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Test.json")) {
|
||||
fileOutputStream.write(objectMapper.writeValueAsBytes(result.getText()));
|
||||
}
|
||||
|
||||
int correctFound = 0;
|
||||
loop:
|
||||
for (RedactionLogEntry redactionLogEntry : result.getRedactionLog().getRedactionLogEntry()) {
|
||||
for (SectionText sectionText : result.getText().getSectionTexts()) {
|
||||
if (redactionLogEntry.getType().equals("image")) {
|
||||
correctFound++;
|
||||
continue loop;
|
||||
}
|
||||
if (redactionLogEntry.getSectionNumber() == sectionText.getSectionNumber()) {
|
||||
String value = sectionText.getText()
|
||||
.substring(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset());
|
||||
if (redactionLogEntry.getValue().equalsIgnoreCase(value)) {
|
||||
correctFound++;
|
||||
} else {
|
||||
throw new RuntimeException("WTF");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
assertThat(correctFound).isEqualTo(result.getRedactionLog().getRedactionLogEntry().size());
|
||||
|
||||
|
||||
dictionary.get(AUTHOR).add("properties");
|
||||
reanlysisVersions.put("properties", 1L);
|
||||
|
||||
dictionary.get(AUTHOR).add("physical");
|
||||
reanlysisVersions.put("physical", 2L);
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(2L);
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(AUTHOR));
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder()
|
||||
.redactionLog(result.getRedactionLog())
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.manualRedactions(null)
|
||||
.text(result.getText())
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
.build());
|
||||
|
||||
end = System.currentTimeMillis();
|
||||
System.out.println("reanalysis analysis duration: " + (end - start));
|
||||
|
||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.redactionLog(reanalyzeResult.getRedactionLog())
|
||||
.sectionGrid(result.getSectionGrid())
|
||||
.build());
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
@Ignore
|
||||
public void fillRecanTest() throws IOException {
|
||||
|
||||
System.out.println("redactionTest");
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/S5.pdf");
|
||||
|
||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.build();
|
||||
|
||||
AnalyzeResult result = redactionController.analyze(request);
|
||||
|
||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.redactionLog(result.getRedactionLog())
|
||||
@ -496,9 +628,70 @@ public class RedactionIntegrationTest {
|
||||
|
||||
System.out.println("duration: " + (end - start));
|
||||
System.out.println("numberOfPages: " + result.getNumberOfPages());
|
||||
|
||||
SectionArea sectionArea = result.getText().getSectionTexts().get(3).getSectionAreas().get(5);
|
||||
|
||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(IOUtils.toByteArray(pdfFileResource.getInputStream())))) {
|
||||
|
||||
PDPage docPage = pdDocument.getPage(0);
|
||||
|
||||
PDFTextStripperByArea textStripper = new PDFTextStripperByArea();
|
||||
|
||||
PDRectangle cropBox = docPage.getCropBox();
|
||||
PDRectangle mediaBox = docPage.getMediaBox();
|
||||
|
||||
|
||||
// if (textPositions.get(0).getRotation() == 90) {
|
||||
// posXEnd = textPositions.get(0).getYDirAdj() + 2;
|
||||
// posYInit = getY1();
|
||||
// posYEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() - height + 4;
|
||||
// } else {
|
||||
// posXEnd = textPositions.get(textPositions.size() - 1)
|
||||
// .getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidth() + 1;
|
||||
// posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2;
|
||||
// posYEnd = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1)
|
||||
// .getYDirAdj() + 2;
|
||||
// }
|
||||
|
||||
|
||||
Rectangle2D rect = new Rectangle2D.Float(sectionArea.getTopLeft()
|
||||
.getY(), sectionArea.getTopLeft()
|
||||
.getX() , sectionArea.getHeight(), sectionArea
|
||||
.getWidth() + 0.001f);
|
||||
|
||||
textStripper.addRegion("region", rect);
|
||||
|
||||
|
||||
|
||||
textStripper.extractRegions(docPage);
|
||||
|
||||
String textForRegion = textStripper.getTextForRegion("region");
|
||||
|
||||
System.out.println(textForRegion);
|
||||
|
||||
// fill a rectangle
|
||||
PDPageContentStream contents = new PDPageContentStream (pdDocument, docPage, PDPageContentStream.AppendMode.APPEND, false, false);
|
||||
contents.setNonStrokingColor (Color.RED);
|
||||
contents.addRect (sectionArea.getTopLeft().getX(), sectionArea.getTopLeft().getY(), sectionArea.getWidth(), sectionArea.getHeight());
|
||||
contents.fill ();
|
||||
contents.close ();
|
||||
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
|
||||
pdDocument.save(byteArrayOutputStream);
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated2.pdf")) {
|
||||
fileOutputStream.write(byteArrayOutputStream.toByteArray());
|
||||
}
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
throw new RedactionException(e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testTableRedaction() throws IOException {
|
||||
|
||||
@ -569,7 +762,7 @@ public class RedactionIntegrationTest {
|
||||
manualRedactionEntry.setReason("Manual Redaction");
|
||||
manualRedactionEntry.setPositions(List.of(new Rectangle(new Point(375.61096f, 241.282f), 7.648041f, 43.72262f, 1), new Rectangle(new Point(384.83517f, 241.282f), 7.648041f, 17.043358f, 1)));
|
||||
|
||||
manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
|
||||
// manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
|
||||
|
||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
@ -579,9 +772,25 @@ public class RedactionIntegrationTest {
|
||||
|
||||
AnalyzeResult result = redactionController.analyze(request);
|
||||
|
||||
manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
|
||||
manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder()
|
||||
.id("5b940b2cb401ed9f5be6fc24f6e77bcf")
|
||||
.status(Status.APPROVED)
|
||||
.build()));
|
||||
|
||||
|
||||
ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder()
|
||||
.redactionLog(result.getRedactionLog())
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.manualRedactions(manualRedactions)
|
||||
.text(result.getText())
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
.build());
|
||||
|
||||
|
||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.redactionLog(result.getRedactionLog())
|
||||
.redactionLog(reanalyzeResult.getRedactionLog())
|
||||
.sectionGrid(result.getSectionGrid())
|
||||
.build());
|
||||
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import com.iqser.red.service.configuration.v1.api.model.Colors;
|
||||
import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
|
||||
import com.iqser.red.service.configuration.v1.api.model.DictionaryResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
||||
@ -129,12 +130,12 @@ public class EntityRedactionServiceTest {
|
||||
.build();
|
||||
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H."))
|
||||
.entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H.")))
|
||||
.build();
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA"))
|
||||
.entries(toDictionaryEntry(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
|
||||
|
||||
@ -162,12 +163,12 @@ public class EntityRedactionServiceTest {
|
||||
.build();
|
||||
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H."))
|
||||
.entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H.")))
|
||||
.build();
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA"))
|
||||
.entries(toDictionaryEntry(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
@ -191,11 +192,11 @@ public class EntityRedactionServiceTest {
|
||||
" Supplement - Identity of the active substance - Reference list.pdf");
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt")))
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt")))
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
@ -228,15 +229,15 @@ public class EntityRedactionServiceTest {
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Row With Ambiguous Redaction.pdf");
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt")))
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt")))
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt")))
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
|
||||
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||
@ -297,11 +298,11 @@ public class EntityRedactionServiceTest {
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Applicant Producer Table.pdf");
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt")))
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt")))
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
@ -346,7 +347,7 @@ public class EntityRedactionServiceTest {
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(authorResponse);
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt")))
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
|
||||
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||
@ -367,13 +368,13 @@ public class EntityRedactionServiceTest {
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Header Propagation.pdf");
|
||||
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(Arrays.asList("Bissig R.", "Thanei P."))
|
||||
.entries(toDictionaryEntry(Arrays.asList("Bissig R.", "Thanei P.")))
|
||||
.build();
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland"))
|
||||
.entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
|
||||
|
||||
@ -392,13 +393,13 @@ public class EntityRedactionServiceTest {
|
||||
pdfFileResource = new ClassPathResource("files/Minimal Examples/Header Propagation2.pdf");
|
||||
|
||||
dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(Arrays.asList("Tribolet, R.", "Muir, G.", "Kühne-Thu, H.", "Close, C."))
|
||||
.entries(toDictionaryEntry(Arrays.asList("Tribolet, R.", "Muir, G.", "Kühne-Thu, H.", "Close, C.")))
|
||||
.build();
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
|
||||
addressResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland"))
|
||||
.entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
|
||||
|
||||
@ -419,13 +420,13 @@ public class EntityRedactionServiceTest {
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Empty Tabular Data.pdf");
|
||||
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.singletonList("Aldershof S."))
|
||||
.entries(toDictionaryEntry(Collections.singletonList("Aldershof S.")))
|
||||
.build();
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland"))
|
||||
.entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
|
||||
|
||||
@ -517,4 +518,12 @@ public class EntityRedactionServiceTest {
|
||||
}
|
||||
}
|
||||
|
||||
private List<DictionaryEntry> toDictionaryEntry(List<String> entries){
|
||||
List<DictionaryEntry> dictionaryEntries = new ArrayList<>();
|
||||
entries.forEach(entry -> {
|
||||
dictionaryEntries.add(new DictionaryEntry(entry, 1L, false));
|
||||
});
|
||||
return dictionaryEntries;
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user