Merge branch 'master' of ssh://git.iqser.com:2222/red/redaction-service into Test
This commit is contained in:
commit
167ae3be94
@ -5,7 +5,7 @@
|
||||
<parent>
|
||||
<groupId>com.atlassian.bamboo</groupId>
|
||||
<artifactId>bamboo-specs-parent</artifactId>
|
||||
<version>7.1.2</version>
|
||||
<version>7.2.2</version>
|
||||
<relativePath/>
|
||||
</parent>
|
||||
|
||||
|
||||
@ -21,6 +21,8 @@ import com.atlassian.bamboo.specs.builders.task.VcsTagTask;
|
||||
import com.atlassian.bamboo.specs.builders.trigger.BitbucketServerTrigger;
|
||||
import com.atlassian.bamboo.specs.model.task.InjectVariablesScope;
|
||||
import com.atlassian.bamboo.specs.util.BambooServer;
|
||||
import com.atlassian.bamboo.specs.builders.task.ScriptTask;
|
||||
import com.atlassian.bamboo.specs.model.task.ScriptTaskProperties.Location;
|
||||
|
||||
import static com.atlassian.bamboo.specs.builders.task.TestParserTask.createJUnitParserTask;
|
||||
|
||||
@ -84,23 +86,9 @@ public class PlanSpec {
|
||||
.checkoutItems(new CheckoutItem().defaultRepository()),
|
||||
new ScriptTask()
|
||||
.description("Build")
|
||||
.environmentVariables("MAVEN_OPTS="+JVM_ARGS)
|
||||
.inlineBody("#!/bin/bash\n" +
|
||||
"set -e\n" +
|
||||
|
||||
"export MAVEN_OPTS=\"$MAVEN_OPTS "+JVM_ARGS +"\"\n" +
|
||||
|
||||
"if [[ \"${bamboo.version_tag}\" != \"dev\" ]]; then ${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn --no-transfer-progress -f ${bamboo_build_working_directory}/" + SERVICE_NAME + "-v1/pom.xml versions:set -DnewVersion=${bamboo.version_tag}; fi\n" +
|
||||
"if [[ \"${bamboo.version_tag}\" != \"dev\" ]]; then ${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn --no-transfer-progress -f ${bamboo_build_working_directory}/" + SERVICE_NAME + "-image-v1/pom.xml versions:set -DnewVersion=${bamboo.version_tag}; fi\n" +
|
||||
|
||||
"if [[ \"${bamboo.version_tag}\" = \"dev\" ]]; then ${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn -f ${bamboo_build_working_directory}/" + SERVICE_NAME + "-v1/pom.xml --no-transfer-progress clean install -Djava.security.egd=file:/dev/./urandom; fi\n" +
|
||||
"if [[ \"${bamboo.version_tag}\" != \"dev\" ]]; then ${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn -f ${bamboo_build_working_directory}/" + SERVICE_NAME + "-v1/pom.xml --no-transfer-progress clean deploy -e -DdeployAtEnd=true -Dmaven.wagon.http.ssl.insecure=true -Dmaven.wagon.http.ssl.allowall=true -Dmaven.wagon.http.ssl.ignore.validity.dates=true -DaltDeploymentRepository=iqser_release::default::https://nexus.iqser.com/repository/red-platform-releases; fi\n" +
|
||||
|
||||
"${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn --no-transfer-progress -f ${bamboo_build_working_directory}/" + SERVICE_NAME + "-image-v1/pom.xml package\n" +
|
||||
"${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn --no-transfer-progress -f ${bamboo_build_working_directory}/" + SERVICE_NAME + "-image-v1/pom.xml docker:push\n" +
|
||||
|
||||
"if [[ \"${bamboo.version_tag}\" = \"dev\" ]]; then echo \"gitTag=${bamboo.planRepository.1.branch}_${bamboo.buildNumber}\" > git.tag; fi\n" +
|
||||
"if [[ \"${bamboo.version_tag}\" != \"dev\" ]]; then echo \"gitTag=${bamboo.version_tag}\" > git.tag; fi\n"),
|
||||
.location(Location.FILE)
|
||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/build-java.sh")
|
||||
.argument(SERVICE_NAME),
|
||||
createJUnitParserTask()
|
||||
.description("Resultparser")
|
||||
.resultDirectories("**/test-reports/*.xml, **/target/surefire-reports/*.xml, **/target/failsafe-reports/*.xml")
|
||||
|
||||
51
bamboo-specs/src/main/resources/scripts/build-java.sh
Executable file
51
bamboo-specs/src/main/resources/scripts/build-java.sh
Executable file
@ -0,0 +1,51 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
SERVICE_NAME=$1
|
||||
|
||||
if [[ "${bamboo_version_tag}" = "dev" ]]
|
||||
then
|
||||
${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
|
||||
-f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \
|
||||
--no-transfer-progress \
|
||||
clean install \
|
||||
-Djava.security.egd=file:/dev/./urandomelse
|
||||
else
|
||||
${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
|
||||
--no-transfer-progress \
|
||||
-f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \
|
||||
versions:set \
|
||||
-DnewVersion=${bamboo_version_tag}
|
||||
${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
|
||||
--no-transfer-progress \
|
||||
-f ${bamboo_build_working_directory}/$SERVICE_NAME-image-v1/pom.xml \
|
||||
versions:set \
|
||||
-DnewVersion=${bamboo_version_tag}
|
||||
${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
|
||||
-f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \
|
||||
--no-transfer-progress \
|
||||
clean deploy \
|
||||
-e \
|
||||
-DdeployAtEnd=true \
|
||||
-Dmaven.wagon.http.ssl.insecure=true \
|
||||
-Dmaven.wagon.http.ssl.allowall=true \
|
||||
-Dmaven.wagon.http.ssl.ignore.validity.dates=true \
|
||||
-DaltDeploymentRepository=iqser_release::default::https://nexus.iqser.com/repository/red-platform-releases
|
||||
fi
|
||||
|
||||
${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
|
||||
--no-transfer-progress \
|
||||
-f ${bamboo_build_working_directory}/$SERVICE_NAME-image-v1/pom.xml \
|
||||
package
|
||||
|
||||
${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
|
||||
--no-transfer-progress \
|
||||
-f ${bamboo_build_working_directory}/$SERVICE_NAME-image-v1/pom.xml \
|
||||
docker:push
|
||||
|
||||
if [[ "${bamboo_version_tag}" = "dev" ]]
|
||||
then
|
||||
echo "gitTag=${bamboo_planRepository_1_branch}_${bamboo_buildNumber}" > git.tag
|
||||
else
|
||||
echo "gitTag=${bamboo_version_tag}" > git.tag
|
||||
fi
|
||||
@ -17,5 +17,16 @@
|
||||
<artifactId>spring-web</artifactId>
|
||||
<optional>true</optional>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<artifactId>configuration-service-api-v1</artifactId>
|
||||
<version>2.11.0</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<artifactId>file-management-service-api-v1</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
|
||||
@ -13,9 +13,9 @@ import java.time.OffsetDateTime;
|
||||
@AllArgsConstructor
|
||||
public class AnalyzeRequest {
|
||||
|
||||
private String projectId;
|
||||
private String dossierId;
|
||||
private String fileId;
|
||||
private String ruleSetId;
|
||||
private String dossierTemplateId;
|
||||
private boolean reanalyseOnlyIfPossible;
|
||||
private ManualRedactions manualRedactions;
|
||||
private OffsetDateTime lastProcessed;
|
||||
|
||||
@ -11,7 +11,7 @@ import lombok.NoArgsConstructor;
|
||||
@AllArgsConstructor
|
||||
public class AnalyzeResult {
|
||||
|
||||
private String projectId;
|
||||
private String dossierId;
|
||||
private String fileId;
|
||||
private long duration;
|
||||
private int numberOfPages;
|
||||
@ -23,6 +23,7 @@ public class AnalyzeResult {
|
||||
private long dictionaryVersion;
|
||||
private long dossierDictionaryVersion;
|
||||
private long rulesVersion;
|
||||
private long legalBasisVersion;
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -11,6 +11,7 @@ import lombok.NoArgsConstructor;
|
||||
@AllArgsConstructor
|
||||
public class AnnotateRequest {
|
||||
|
||||
private String projectId;
|
||||
private String dossierId;
|
||||
private String dossierTemplateId;
|
||||
private String fileId;
|
||||
}
|
||||
|
||||
@ -15,8 +15,8 @@ public class RedactionChangeLog {
|
||||
private List<RedactionChangeLogEntry> redactionLogEntry = new ArrayList<>();
|
||||
|
||||
private long dictionaryVersion = -1;
|
||||
private long dossierDictionaryVersion = -1;
|
||||
private long rulesVersion = -1;
|
||||
|
||||
private String ruleSetId;
|
||||
private long legalBasisVersion = -1;
|
||||
|
||||
}
|
||||
|
||||
@ -1,32 +1,21 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import com.iqser.red.service.configuration.v1.api.model.LegalBasisMapping;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class RedactionLog {
|
||||
|
||||
private List<RedactionLogEntry> redactionLogEntry;
|
||||
private List<LegalBasisMapping> legalBasis;
|
||||
|
||||
private long dictionaryVersion = -1;
|
||||
private long rulesVersion = -1;
|
||||
|
||||
private String ruleSetId;
|
||||
|
||||
private long dossierDictionaryVersion = -1;
|
||||
|
||||
|
||||
public RedactionLog(List<RedactionLogEntry> redactionLogEntry, long dictionaryVersion, long rulesVersion, String ruleSetId, long dossierDictionaryVersion) {
|
||||
|
||||
this.redactionLogEntry = redactionLogEntry;
|
||||
this.dictionaryVersion = dictionaryVersion;
|
||||
this.rulesVersion = rulesVersion;
|
||||
this.ruleSetId = ruleSetId;
|
||||
this.dossierDictionaryVersion = dossierDictionaryVersion;
|
||||
}
|
||||
|
||||
private long rulesVersion = -1;
|
||||
private long legalBasisVersion = -1;
|
||||
|
||||
}
|
||||
|
||||
@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.model;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import java.util.ArrayList;
|
||||
@ -12,6 +13,7 @@ import java.util.List;
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@EqualsAndHashCode
|
||||
public class RedactionLogEntry {
|
||||
|
||||
private String id;
|
||||
|
||||
@ -11,8 +11,8 @@ import lombok.NoArgsConstructor;
|
||||
@AllArgsConstructor
|
||||
public class RedactionRequest {
|
||||
|
||||
private String projectId;
|
||||
private String dossierId;
|
||||
private String fileId;
|
||||
private String ruleSetId;
|
||||
private String dossierTemplateId;
|
||||
private ManualRedactions manualRedactions;
|
||||
}
|
||||
|
||||
@ -10,7 +10,7 @@ public interface RedactionResource {
|
||||
|
||||
String SERVICE_NAME = "redaction-service-v1";
|
||||
|
||||
String RULE_SET_PARAMETER_NAME = "ruleSetId";
|
||||
String RULE_SET_PARAMETER_NAME = "dossierTemplateId";
|
||||
String RULE_SET_PATH_VARIABLE = "/{" + RULE_SET_PARAMETER_NAME + "}";
|
||||
|
||||
|
||||
@ -27,7 +27,7 @@ public interface RedactionResource {
|
||||
RedactionResult htmlTables(@RequestBody RedactionRequest redactionRequest);
|
||||
|
||||
@PostMapping(value = "/rules/update" + RULE_SET_PATH_VARIABLE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
||||
void updateRules(@PathVariable(RULE_SET_PARAMETER_NAME) String ruleSetId);
|
||||
void updateRules(@PathVariable(RULE_SET_PARAMETER_NAME) String dossierTemplateId);
|
||||
|
||||
@PostMapping(value = "/rules/test", consumes = MediaType.APPLICATION_JSON_VALUE)
|
||||
void testRules(@RequestBody String rules);
|
||||
|
||||
@ -21,20 +21,19 @@
|
||||
<artifactId>redaction-service-api-v1</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<artifactId>configuration-service-api-v1</artifactId>
|
||||
<version>2.5.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<artifactId>file-management-service-api-v1</artifactId>
|
||||
<version>2.7.4</version>
|
||||
<version>2.25.0</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<artifactId>redaction-service-api-v1</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<artifactId>configuration-service-api-v1</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
@ -57,6 +56,23 @@
|
||||
<artifactId>guava</artifactId>
|
||||
<version>29.0-jre</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>jbig2-imageio</artifactId>
|
||||
<version>3.0.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.github.jai-imageio</groupId>
|
||||
<artifactId>jai-imageio-core</artifactId>
|
||||
<version>1.4.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.github.jai-imageio</groupId>
|
||||
<artifactId>jai-imageio-jpeg2000</artifactId>
|
||||
<version>1.4.0</version>
|
||||
</dependency>
|
||||
|
||||
<!-- commons -->
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.commons</groupId>
|
||||
|
||||
@ -0,0 +1,6 @@
|
||||
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||
|
||||
public enum Orientation {
|
||||
|
||||
NONE, LEFT, RIGHT
|
||||
}
|
||||
@ -30,6 +30,8 @@ public class Page {
|
||||
private StringFrequencyCounter fontCounter = new StringFrequencyCounter();
|
||||
private StringFrequencyCounter fontStyleCounter = new StringFrequencyCounter();
|
||||
|
||||
private double cropBoxArea;
|
||||
|
||||
|
||||
public boolean isRotated() {
|
||||
|
||||
|
||||
@ -32,6 +32,7 @@ public class TextBlock extends AbstractTextContainer {
|
||||
|
||||
private String classification;
|
||||
|
||||
|
||||
public TextBlock(float minX, float maxX, float minY, float maxY, List<TextPositionSequence> sequences, int rotation) {
|
||||
this.minX = minX;
|
||||
this.maxX = maxX;
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.classification.service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.FloatFrequencyCounter;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Orientation;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.StringFrequencyCounter;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
@ -11,16 +12,21 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
@Service
|
||||
@SuppressWarnings("all")
|
||||
public class BlockificationService {
|
||||
|
||||
public Page blockify(List<TextPositionSequence> textPositions, List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
|
||||
static final float THRESHOLD = 1f;
|
||||
|
||||
public Page blockify(List<TextPositionSequence> textPositions, List<Ruling> horizontalRulingLines,
|
||||
List<Ruling> verticalRulingLines) {
|
||||
|
||||
List<TextPositionSequence> chunkWords = new ArrayList<>();
|
||||
List<AbstractTextContainer> chunkBlockList1 = new ArrayList<>();
|
||||
@ -28,21 +34,46 @@ public class BlockificationService {
|
||||
float minX = 1000, maxX = 0, minY = 1000, maxY = 0;
|
||||
TextPositionSequence prev = null;
|
||||
|
||||
boolean wasSplitted = false;
|
||||
Float splitX1 = null;
|
||||
for (TextPositionSequence word : textPositions) {
|
||||
|
||||
boolean lineSeparation = minY - word.getY2() > word.getHeight() * 1.25;
|
||||
boolean startFromTop = word.getY1() > maxY + word.getHeight();
|
||||
boolean splitByX = prev != null && maxX + 50 < word.getX1() && prev.getY1() == word.getY1();
|
||||
boolean newLineAfterSplit = prev != null && word.getY1() != prev.getY1() && wasSplitted && splitX1 != word.getX1();
|
||||
boolean splittedByRuling = word.getRotation() == 0 && isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), verticalRulingLines) || word
|
||||
.getRotation() == 0 && isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), horizontalRulingLines) || word
|
||||
.getRotation() == 90 && isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), horizontalRulingLines) || word
|
||||
.getRotation() == 90 && isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), verticalRulingLines);
|
||||
|
||||
if (prev != null && (lineSeparation || startFromTop || word.getRotation() == 0 && isSplittedByRuling(maxX, minY, word
|
||||
.getX1(), word.getY1(), verticalRulingLines) || word.getRotation() == 0 && isSplittedByRuling(minX, minY, word
|
||||
.getX1(), word.getY2(), horizontalRulingLines) || word.getRotation() == 90 && isSplittedByRuling(maxX, minY, word
|
||||
.getX1(), word.getY1(), horizontalRulingLines) || word.getRotation() == 90 && isSplittedByRuling(minX, minY, word
|
||||
.getX1(), word.getY2(), verticalRulingLines))) {
|
||||
if (prev != null && (lineSeparation || startFromTop || splitByX || newLineAfterSplit || splittedByRuling)) {
|
||||
|
||||
Orientation prevOrientation = null;
|
||||
if(!chunkBlockList1.isEmpty()) {
|
||||
prevOrientation = chunkBlockList1.get(chunkBlockList1.size() - 1).getOrientation();
|
||||
}
|
||||
|
||||
TextBlock cb1 = buildTextBlock(chunkWords);
|
||||
chunkBlockList1.add(cb1);
|
||||
chunkWords = new ArrayList<>();
|
||||
|
||||
if (splitByX && !splittedByRuling) {
|
||||
wasSplitted = true;
|
||||
cb1.setOrientation(Orientation.LEFT);
|
||||
splitX1 = word.getX1();
|
||||
} else
|
||||
|
||||
if (newLineAfterSplit && !splittedByRuling) {
|
||||
wasSplitted = false;
|
||||
cb1.setOrientation(Orientation.RIGHT);
|
||||
splitX1 = null;
|
||||
} else
|
||||
|
||||
if(prevOrientation != null && prevOrientation.equals(Orientation.RIGHT) && (lineSeparation || !startFromTop || !splitByX || !newLineAfterSplit || !splittedByRuling)){
|
||||
cb1.setOrientation(Orientation.LEFT);
|
||||
}
|
||||
|
||||
minX = 1000;
|
||||
maxX = 0;
|
||||
minY = 1000;
|
||||
@ -72,9 +103,62 @@ public class BlockificationService {
|
||||
chunkBlockList1.add(cb1);
|
||||
}
|
||||
|
||||
Iterator<AbstractTextContainer> itty = chunkBlockList1.iterator();
|
||||
|
||||
TextBlock previousLeft = null;
|
||||
TextBlock previousRight = null;
|
||||
while (itty.hasNext()) {
|
||||
TextBlock block = (TextBlock) itty.next();
|
||||
|
||||
if(previousLeft != null && block.getOrientation().equals(Orientation.LEFT)){
|
||||
if (previousLeft.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousLeft.getMinY()){
|
||||
previousLeft.add(block);
|
||||
itty.remove();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if(previousRight != null && block.getOrientation().equals(Orientation.RIGHT)){
|
||||
if (previousRight.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousRight.getMinY()){
|
||||
previousRight.add(block);
|
||||
itty.remove();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (block.getOrientation().equals(Orientation.LEFT)) {
|
||||
previousLeft = block;
|
||||
} else if (block.getOrientation().equals(Orientation.RIGHT)) {
|
||||
previousRight = block;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
itty = chunkBlockList1.iterator();
|
||||
TextBlock previous = null;
|
||||
while (itty.hasNext()) {
|
||||
TextBlock block = (TextBlock) itty.next();
|
||||
|
||||
if(previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation().equals(Orientation.LEFT) && equalsWithThreshold(block.getMaxY(), previous
|
||||
.getMaxY())||
|
||||
previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation().equals(Orientation.RIGHT) && equalsWithThreshold(block.getMaxY(), previous
|
||||
.getMaxY())){
|
||||
previous.add(block);
|
||||
itty.remove();
|
||||
continue;
|
||||
}
|
||||
|
||||
previous = block;
|
||||
}
|
||||
|
||||
|
||||
return new Page(chunkBlockList1);
|
||||
}
|
||||
|
||||
private boolean equalsWithThreshold(float f1, float f2){
|
||||
return Math.abs(f1 - f2) < THRESHOLD;
|
||||
}
|
||||
|
||||
|
||||
private TextBlock buildTextBlock(List<TextPositionSequence> wordBlockList) {
|
||||
|
||||
@ -117,7 +201,8 @@ public class BlockificationService {
|
||||
}
|
||||
|
||||
|
||||
private boolean isSplittedByRuling(float previousX2, float previousY1, float currentX1, float currentY1, List<Ruling> rulingLines) {
|
||||
private boolean isSplittedByRuling(float previousX2, float previousY1, float currentX1, float currentY1,
|
||||
List<Ruling> rulingLines) {
|
||||
|
||||
for (Ruling ruling : rulingLines) {
|
||||
if (ruling.intersectsLine(previousX2, previousY1, currentX1, currentY1)) {
|
||||
@ -128,7 +213,8 @@ public class BlockificationService {
|
||||
}
|
||||
|
||||
|
||||
public Rectangle calculateBodyTextFrame(List<Page> pages, FloatFrequencyCounter documentFontSizeCounter, boolean landscape) {
|
||||
public Rectangle calculateBodyTextFrame(List<Page> pages, FloatFrequencyCounter documentFontSizeCounter,
|
||||
boolean landscape) {
|
||||
|
||||
float minX = 10000;
|
||||
float maxX = -100;
|
||||
|
||||
@ -0,0 +1,8 @@
|
||||
package com.iqser.red.service.redaction.v1.server.client;
|
||||
|
||||
import com.iqser.red.service.configuration.v1.api.resource.LegalBasisMappingResource;
|
||||
import org.springframework.cloud.openfeign.FeignClient;
|
||||
|
||||
@FeignClient(name = "LegalBasisMappingResource", url = "${configuration-service.url}")
|
||||
public interface LegalBasisClient extends LegalBasisMappingResource {
|
||||
}
|
||||
@ -43,14 +43,14 @@ public class RedactionController implements RedactionResource {
|
||||
|
||||
public AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest) {
|
||||
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(annotateRequest.getProjectId(), annotateRequest.getFileId(), FileType.ORIGIN));
|
||||
var redactionLog = redactionStorageService.getRedactionLog(annotateRequest.getProjectId(), annotateRequest.getFileId());
|
||||
var sectionsGrid = redactionStorageService.getSectionGrid(annotateRequest.getProjectId(), annotateRequest.getFileId());
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(annotateRequest.getDossierId(), annotateRequest.getFileId(), FileType.ORIGIN));
|
||||
var redactionLog = redactionStorageService.getRedactionLog(annotateRequest.getDossierId(), annotateRequest.getFileId());
|
||||
var sectionsGrid = redactionStorageService.getSectionGrid(annotateRequest.getDossierId(), annotateRequest.getFileId());
|
||||
|
||||
try (PDDocument pdDocument = PDDocument.load(storedObjectStream, MemoryUsageSetting.setupTempFileOnly())) {
|
||||
pdDocument.setAllSecurityToBeRemoved(true);
|
||||
|
||||
dictionaryService.updateDictionary(redactionLog.getRuleSetId(), annotateRequest.getProjectId());
|
||||
dictionaryService.updateDictionary(annotateRequest.getDossierTemplateId(), annotateRequest.getDossierId());
|
||||
annotationService.annotate(pdDocument, redactionLog, sectionsGrid);
|
||||
|
||||
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
|
||||
@ -66,11 +66,11 @@ public class RedactionController implements RedactionResource {
|
||||
|
||||
@Override
|
||||
public RedactionResult classify(@RequestBody RedactionRequest redactionRequest) {
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getProjectId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
try {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream);
|
||||
|
||||
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getProjectId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
|
||||
pdDocument.setAllSecurityToBeRemoved(true);
|
||||
|
||||
@ -91,11 +91,11 @@ public class RedactionController implements RedactionResource {
|
||||
|
||||
@Override
|
||||
public RedactionResult sections(@RequestBody RedactionRequest redactionRequest) {
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getProjectId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
try {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream);
|
||||
|
||||
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getProjectId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
|
||||
pdDocument.setAllSecurityToBeRemoved(true);
|
||||
|
||||
@ -120,7 +120,7 @@ public class RedactionController implements RedactionResource {
|
||||
Document classifiedDoc;
|
||||
|
||||
try {
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getProjectId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, true);
|
||||
} catch (Exception e) {
|
||||
throw new RedactionException(e);
|
||||
@ -143,9 +143,9 @@ public class RedactionController implements RedactionResource {
|
||||
|
||||
|
||||
@Override
|
||||
public void updateRules(@PathVariable(RULE_SET_PARAMETER_NAME) String ruleSetId) {
|
||||
public void updateRules(@PathVariable(RULE_SET_PARAMETER_NAME) String dossierTemplateId) {
|
||||
|
||||
droolsExecutionService.updateRules(ruleSetId);
|
||||
droolsExecutionService.updateRules(dossierTemplateId);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -46,6 +46,17 @@ public class PDFAreaTextStripper extends PDFTextStripperByArea {
|
||||
startIndex = i;
|
||||
}
|
||||
|
||||
|
||||
if (textPositions.get(i).getRotation() == 0 && i > 0 && textPositions.get(i).getX() > textPositions.get(i - 1).getEndX() + 1) {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, i);
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
}
|
||||
startIndex = i;
|
||||
}
|
||||
|
||||
if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i)
|
||||
.getUnicode()
|
||||
.equals("\u00A0")) && i <= textPositions.size() - 2) {
|
||||
|
||||
@ -300,6 +300,18 @@ public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
startIndex = i;
|
||||
}
|
||||
|
||||
|
||||
if (textPositions.get(i).getRotation() == 0 && i > 0 && textPositions.get(i).getX() > textPositions.get(i - 1).getEndX() + 1) {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, i);
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
}
|
||||
startIndex = i;
|
||||
}
|
||||
|
||||
|
||||
if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i)
|
||||
.getUnicode()
|
||||
.equals("\u00A0")) && i <= textPositions.size() - 2) {
|
||||
|
||||
@ -21,7 +21,7 @@ public class MessagingConfiguration {
|
||||
|
||||
return QueueBuilder.durable(REDACTION_QUEUE)
|
||||
.withArgument("x-dead-letter-exchange", "")
|
||||
.withArgument("x-dead-letter-routing-key", REDACTION_QUEUE)
|
||||
.withArgument("x-dead-letter-routing-key", REDACTION_DQL)
|
||||
.maxPriority(2)
|
||||
.build();
|
||||
}
|
||||
|
||||
@ -38,7 +38,7 @@ public class RedactionMessageReceiver {
|
||||
}
|
||||
log.info("Successfully analyzed {}", analyzeRequest);
|
||||
|
||||
fileStatusProcessingUpdateClient.analysisSuccessful(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), result);
|
||||
fileStatusProcessingUpdateClient.analysisSuccessful(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result);
|
||||
}
|
||||
|
||||
@RabbitHandler
|
||||
@ -48,7 +48,7 @@ public class RedactionMessageReceiver {
|
||||
var analyzeRequest = objectMapper.readValue(in, AnalyzeRequest.class);
|
||||
log.info("Failed to process analyze request: {}", analyzeRequest);
|
||||
|
||||
fileStatusProcessingUpdateClient.analysisFailed(analyzeRequest.getProjectId(), analyzeRequest.getFileId());
|
||||
fileStatusProcessingUpdateClient.analysisFailed(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -10,7 +10,7 @@ import java.util.Map;
|
||||
@Data
|
||||
public class DictionaryRepresentation {
|
||||
|
||||
private String ruleSetId;
|
||||
private String dossierTemplateId;
|
||||
private long dictionaryVersion = -1;
|
||||
private List<DictionaryModel> dictionary = new ArrayList<>();
|
||||
private float[] defaultColor;
|
||||
|
||||
@ -11,6 +11,6 @@ import lombok.NoArgsConstructor;
|
||||
@AllArgsConstructor
|
||||
public class DictionaryVersion {
|
||||
|
||||
long rulesetVersion;
|
||||
long dossierTemplateVersion;
|
||||
long dossierVersion;
|
||||
}
|
||||
|
||||
@ -11,6 +11,8 @@ import lombok.NoArgsConstructor;
|
||||
@AllArgsConstructor
|
||||
public class RedRectangle2D {
|
||||
|
||||
public static final double THRESHOLD = 0.01;
|
||||
|
||||
private double x;
|
||||
private double y;
|
||||
private double width;
|
||||
@ -27,9 +29,14 @@ public class RedRectangle2D {
|
||||
}
|
||||
double x0 = getX();
|
||||
double y0 = getY();
|
||||
return x >= x0 &&
|
||||
y >= y0 &&
|
||||
(x + w) <= x0 + getWidth() &&
|
||||
(y + h) <= y0 + getHeight();
|
||||
return round(x) >= round(x0) &&
|
||||
round(y) >= round(y0) &&
|
||||
(x + w) - (x0 + getWidth()) <= THRESHOLD &&
|
||||
(y + h) - (y0 + getHeight()) <= THRESHOLD;
|
||||
}
|
||||
|
||||
private double round(double value) {
|
||||
double d = Math.pow(10, 2);
|
||||
return Math.round(value * d) / d;
|
||||
}
|
||||
}
|
||||
|
||||
@ -9,7 +9,7 @@ import org.springframework.stereotype.Service;
|
||||
@Service
|
||||
public class AnalyzeResponseService {
|
||||
|
||||
public AnalyzeResult createAnalyzeResponse(String projectId, String fileId, long duration, int pageCount, RedactionLog redactionLog, RedactionChangeLog redactionChangeLog) {
|
||||
public AnalyzeResult createAnalyzeResponse(String dossierId, String fileId, long duration, int pageCount, RedactionLog redactionLog, RedactionChangeLog redactionChangeLog) {
|
||||
boolean hasHints = redactionLog.getRedactionLogEntry().stream().anyMatch(RedactionLogEntry::isHint);
|
||||
|
||||
boolean hasRequests = redactionLog.getRedactionLogEntry()
|
||||
@ -31,7 +31,7 @@ public class AnalyzeResponseService {
|
||||
.isEmpty() && redactionChangeLog.getRedactionLogEntry().stream().anyMatch(entry -> !entry.getType().equals("false_positive"));
|
||||
|
||||
return AnalyzeResult.builder()
|
||||
.projectId(projectId)
|
||||
.dossierId(dossierId)
|
||||
.fileId(fileId)
|
||||
.duration(duration)
|
||||
.numberOfPages(pageCount)
|
||||
@ -42,6 +42,7 @@ public class AnalyzeResponseService {
|
||||
.hasUpdates(hasUpdates)
|
||||
.rulesVersion(redactionLog.getRulesVersion())
|
||||
.dictionaryVersion(redactionLog.getDictionaryVersion())
|
||||
.legalBasisVersion(redactionLog.getLegalBasisVersion())
|
||||
.dossierDictionaryVersion(redactionLog.getDossierDictionaryVersion())
|
||||
.build();
|
||||
}
|
||||
|
||||
@ -1,19 +1,12 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import static com.iqser.red.service.configuration.v1.api.resource.DictionaryResource.GLOBAL_DOSSIER;
|
||||
|
||||
import com.iqser.red.service.configuration.v1.api.model.Colors;
|
||||
import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
||||
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryRepresentation;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryVersion;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
|
||||
import feign.FeignException;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
@ -25,6 +18,8 @@ import java.awt.Color;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static com.iqser.red.service.configuration.v1.api.resource.DictionaryResource.GLOBAL_DOSSIER;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@ -32,37 +27,37 @@ public class DictionaryService {
|
||||
|
||||
private final DictionaryClient dictionaryClient;
|
||||
|
||||
private final Map<String, DictionaryRepresentation> dictionariesByRuleSets = new HashMap<>();
|
||||
private final Map<String, DictionaryRepresentation> dictionariesByDossierTemplate = new HashMap<>();
|
||||
private final Map<String, DictionaryRepresentation> dictionariesByDossier = new HashMap<>();
|
||||
|
||||
|
||||
public DictionaryVersion updateDictionary(String ruleSetId, String dossierId) {
|
||||
public DictionaryVersion updateDictionary(String dossierTemplateId, String dossierId) {
|
||||
|
||||
long rulesetDictionaryVersion = dictionaryClient.getVersion(ruleSetId, GLOBAL_DOSSIER);
|
||||
var rulesetDictionary = dictionariesByRuleSets.get(ruleSetId);
|
||||
if (rulesetDictionary == null || rulesetDictionaryVersion > rulesetDictionary.getDictionaryVersion()) {
|
||||
updateDictionaryEntry(ruleSetId, rulesetDictionaryVersion, GLOBAL_DOSSIER);
|
||||
long dossierTemplateDictionaryVersion = dictionaryClient.getVersion(dossierTemplateId, GLOBAL_DOSSIER);
|
||||
var dossierTemplateDictionary = dictionariesByDossierTemplate.get(dossierTemplateId);
|
||||
if (dossierTemplateDictionary == null || dossierTemplateDictionaryVersion > dossierTemplateDictionary.getDictionaryVersion()) {
|
||||
updateDictionaryEntry(dossierTemplateId, dossierTemplateDictionaryVersion, GLOBAL_DOSSIER);
|
||||
}
|
||||
|
||||
long dossierDictionaryVersion = dictionaryClient.getVersion(ruleSetId, dossierId);
|
||||
long dossierDictionaryVersion = dictionaryClient.getVersion(dossierTemplateId, dossierId);
|
||||
var dossierDictionary = dictionariesByDossier.get(dossierId);
|
||||
if (dossierDictionary == null || dossierDictionaryVersion > dossierDictionary.getDictionaryVersion()) {
|
||||
updateDictionaryEntry(ruleSetId, dossierDictionaryVersion, dossierId);
|
||||
updateDictionaryEntry(dossierTemplateId, dossierDictionaryVersion, dossierId);
|
||||
}
|
||||
|
||||
return DictionaryVersion.builder().rulesetVersion(rulesetDictionaryVersion).dossierVersion(dossierDictionaryVersion).build();
|
||||
return DictionaryVersion.builder().dossierTemplateVersion(dossierTemplateDictionaryVersion).dossierVersion(dossierDictionaryVersion).build();
|
||||
}
|
||||
|
||||
|
||||
public DictionaryIncrement getDictionaryIncrements(String ruleSetId, DictionaryVersion fromVersion, String dossierId) {
|
||||
public DictionaryIncrement getDictionaryIncrements(String dossierTemplateId, DictionaryVersion fromVersion, String dossierId) {
|
||||
|
||||
DictionaryVersion version = updateDictionary(ruleSetId, dossierId);
|
||||
DictionaryVersion version = updateDictionary(dossierTemplateId, dossierId);
|
||||
|
||||
Set<DictionaryIncrementValue> newValues = new HashSet<>();
|
||||
List<DictionaryModel> dictionaryModels = dictionariesByRuleSets.get(ruleSetId).getDictionary();
|
||||
List<DictionaryModel> dictionaryModels = dictionariesByDossierTemplate.get(dossierTemplateId).getDictionary();
|
||||
dictionaryModels.forEach(dictionaryModel -> {
|
||||
dictionaryModel.getEntries().forEach(dictionaryEntry -> {
|
||||
if (dictionaryEntry.getVersion() > fromVersion.getRulesetVersion()) {
|
||||
if (dictionaryEntry.getVersion() > fromVersion.getDossierTemplateVersion()) {
|
||||
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
|
||||
}
|
||||
});
|
||||
@ -83,35 +78,35 @@ public class DictionaryService {
|
||||
}
|
||||
|
||||
|
||||
private void updateDictionaryEntry(String ruleSetId, long version, String dossierId) {
|
||||
private void updateDictionaryEntry(String dossierTemplateId, long version, String dossierId) {
|
||||
|
||||
try {
|
||||
DictionaryRepresentation dictionaryRepresentation = new DictionaryRepresentation();
|
||||
|
||||
TypeResponse typeResponse = dictionaryClient.getAllTypes(ruleSetId, dossierId);
|
||||
TypeResponse typeResponse = dictionaryClient.getAllTypes(dossierTemplateId, dossierId);
|
||||
if (typeResponse != null && CollectionUtils.isNotEmpty(typeResponse.getTypes())) {
|
||||
|
||||
List<DictionaryModel> dictionary = typeResponse.getTypes()
|
||||
.stream()
|
||||
.map(t -> new DictionaryModel(t.getType(), t.getRank(), convertColor(t.getHexColor()), t.isCaseInsensitive(), t
|
||||
.isHint(), t.isRecommendation(), convertEntries(t, dossierId), new HashSet<>(),dossierId.equals(GLOBAL_DOSSIER) ? false : true))
|
||||
.isHint(), t.isRecommendation(), convertEntries(t, dossierId), new HashSet<>(), !dossierId.equals(GLOBAL_DOSSIER)))
|
||||
.sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed())
|
||||
.collect(Collectors.toList());
|
||||
|
||||
dictionary.forEach(dm -> dictionaryRepresentation.getLocalAccessMap().put(dm.getType(), dm));
|
||||
|
||||
Colors colors = dictionaryClient.getColors(ruleSetId);
|
||||
Colors colors = dictionaryClient.getColors(dossierTemplateId);
|
||||
|
||||
dictionaryRepresentation.setDefaultColor(convertColor(colors.getDefaultColor()));
|
||||
dictionaryRepresentation.setRequestAddColor(convertColor(colors.getRequestAdd()));
|
||||
dictionaryRepresentation.setRequestRemoveColor(convertColor(colors.getRequestRemove()));
|
||||
dictionaryRepresentation.setNotRedactedColor(convertColor(colors.getNotRedacted()));
|
||||
dictionaryRepresentation.setRuleSetId(ruleSetId);
|
||||
dictionaryRepresentation.setDossierTemplateId(dossierTemplateId);
|
||||
dictionaryRepresentation.setDictionaryVersion(version);
|
||||
dictionaryRepresentation.setDictionary(dictionary);
|
||||
|
||||
if(dossierId.equals(GLOBAL_DOSSIER)) {
|
||||
dictionariesByRuleSets.put(ruleSetId, dictionaryRepresentation);
|
||||
dictionariesByDossierTemplate.put(dossierTemplateId, dictionaryRepresentation);
|
||||
} else {
|
||||
dictionariesByDossier.put(dossierId, dictionaryRepresentation);
|
||||
}
|
||||
@ -123,14 +118,14 @@ public class DictionaryService {
|
||||
}
|
||||
|
||||
|
||||
public void updateExternalDictionary(Dictionary dictionary, String ruleSetId) {
|
||||
public void updateExternalDictionary(Dictionary dictionary, String dossierTemplateId) {
|
||||
|
||||
dictionary.getDictionaryModels().forEach(dm -> {
|
||||
if (dm.isRecommendation() && !dm.getLocalEntries().isEmpty()) {
|
||||
dictionaryClient.addEntries(dm.getType(), ruleSetId, new ArrayList<>(dm.getLocalEntries()), false, GLOBAL_DOSSIER);
|
||||
long externalVersion = dictionaryClient.getVersion(ruleSetId, GLOBAL_DOSSIER);
|
||||
if (externalVersion == dictionary.getVersion().getRulesetVersion() + 1) {
|
||||
dictionary.getVersion().setRulesetVersion(externalVersion);
|
||||
dictionaryClient.addEntries(dm.getType(), dossierTemplateId, new ArrayList<>(dm.getLocalEntries()), false, GLOBAL_DOSSIER);
|
||||
long externalVersion = dictionaryClient.getVersion(dossierTemplateId, GLOBAL_DOSSIER);
|
||||
if (externalVersion == dictionary.getVersion().getDossierTemplateVersion() + 1) {
|
||||
dictionary.getVersion().setDossierTemplateVersion(externalVersion);
|
||||
}
|
||||
}
|
||||
});
|
||||
@ -139,7 +134,7 @@ public class DictionaryService {
|
||||
|
||||
private Set<DictionaryEntry> convertEntries(TypeResult t, String dossierId) {
|
||||
|
||||
Set<DictionaryEntry> entries = new HashSet<>(dictionaryClient.getDictionaryForType(t.getType(), t.getRuleSetId(), dossierId)
|
||||
Set<DictionaryEntry> entries = new HashSet<>(dictionaryClient.getDictionaryForType(t.getType(), t.getDossierTemplateId(), dossierId)
|
||||
.getEntries());
|
||||
|
||||
if (t.isCaseInsensitive()) {
|
||||
@ -156,9 +151,9 @@ public class DictionaryService {
|
||||
}
|
||||
|
||||
|
||||
public boolean isCaseInsensitiveDictionary(String type, String ruleSetId) {
|
||||
public boolean isCaseInsensitiveDictionary(String type, String dossierTemplateId) {
|
||||
|
||||
DictionaryModel dictionaryModel = dictionariesByRuleSets.get(ruleSetId).getLocalAccessMap().get(type);
|
||||
DictionaryModel dictionaryModel = dictionariesByDossierTemplate.get(dossierTemplateId).getLocalAccessMap().get(type);
|
||||
if (dictionaryModel != null) {
|
||||
return dictionaryModel.isCaseInsensitive();
|
||||
}
|
||||
@ -166,19 +161,19 @@ public class DictionaryService {
|
||||
}
|
||||
|
||||
|
||||
public float[] getColor(String type, String ruleSetId) {
|
||||
public float[] getColor(String type, String dossierTemplateId) {
|
||||
|
||||
DictionaryModel model = dictionariesByRuleSets.get(ruleSetId).getLocalAccessMap().get(type);
|
||||
DictionaryModel model = dictionariesByDossierTemplate.get(dossierTemplateId).getLocalAccessMap().get(type);
|
||||
if (model != null) {
|
||||
return model.getColor();
|
||||
}
|
||||
return dictionariesByRuleSets.get(ruleSetId).getDefaultColor();
|
||||
return dictionariesByDossierTemplate.get(dossierTemplateId).getDefaultColor();
|
||||
}
|
||||
|
||||
|
||||
public boolean isHint(String type, String ruleSetId) {
|
||||
public boolean isHint(String type, String dossierTemplateId) {
|
||||
|
||||
DictionaryModel model = dictionariesByRuleSets.get(ruleSetId).getLocalAccessMap().get(type);
|
||||
DictionaryModel model = dictionariesByDossierTemplate.get(dossierTemplateId).getLocalAccessMap().get(type);
|
||||
if (model != null) {
|
||||
return model.isHint();
|
||||
}
|
||||
@ -186,9 +181,9 @@ public class DictionaryService {
|
||||
}
|
||||
|
||||
|
||||
public boolean isRecommendation(String type, String ruleSetId) {
|
||||
public boolean isRecommendation(String type, String dossierTemplateId) {
|
||||
|
||||
DictionaryModel model = dictionariesByRuleSets.get(ruleSetId).getLocalAccessMap().get(type);
|
||||
DictionaryModel model = dictionariesByDossierTemplate.get(dossierTemplateId).getLocalAccessMap().get(type);
|
||||
if (model != null) {
|
||||
return model.isRecommendation();
|
||||
}
|
||||
@ -196,12 +191,12 @@ public class DictionaryService {
|
||||
}
|
||||
|
||||
|
||||
public Dictionary getDeepCopyDictionary(String ruleSetId, String dossierId) {
|
||||
public Dictionary getDeepCopyDictionary(String dossierTemplateId, String dossierId) {
|
||||
|
||||
List<DictionaryModel> copy = new ArrayList<>();
|
||||
|
||||
var rulesetRepresentation = dictionariesByRuleSets.get(ruleSetId);
|
||||
rulesetRepresentation.getDictionary().forEach(dm -> {
|
||||
var dossierTemplateRepresentation = dictionariesByDossierTemplate.get(dossierTemplateId);
|
||||
dossierTemplateRepresentation.getDictionary().forEach(dm -> {
|
||||
copy.add(SerializationUtils.clone(dm));
|
||||
});
|
||||
|
||||
@ -215,25 +210,25 @@ public class DictionaryService {
|
||||
dossierDictionaryVersion = dossierRepresentation.getDictionaryVersion();
|
||||
}
|
||||
|
||||
return new Dictionary(copy, DictionaryVersion.builder().rulesetVersion(rulesetRepresentation.getDictionaryVersion()).dossierVersion(dossierDictionaryVersion).build());
|
||||
return new Dictionary(copy, DictionaryVersion.builder().dossierTemplateVersion(dossierTemplateRepresentation.getDictionaryVersion()).dossierVersion(dossierDictionaryVersion).build());
|
||||
}
|
||||
|
||||
|
||||
public float[] getRequestRemoveColor(String ruleSetId) {
|
||||
public float[] getRequestRemoveColor(String dossierTemplateId) {
|
||||
|
||||
return dictionariesByRuleSets.get(ruleSetId).getRequestAddColor();
|
||||
return dictionariesByDossierTemplate.get(dossierTemplateId).getRequestAddColor();
|
||||
}
|
||||
|
||||
|
||||
public float[] getNotRedactedColor(String ruleSetId) {
|
||||
public float[] getNotRedactedColor(String dossierTemplateId) {
|
||||
|
||||
return dictionariesByRuleSets.get(ruleSetId).getNotRedactedColor();
|
||||
return dictionariesByDossierTemplate.get(dossierTemplateId).getNotRedactedColor();
|
||||
}
|
||||
|
||||
|
||||
public float[] getRequestAddColor(String ruleSetId) {
|
||||
public float[] getRequestAddColor(String dossierTemplateId) {
|
||||
|
||||
return dictionariesByRuleSets.get(ruleSetId).getRequestAddColor();
|
||||
return dictionariesByDossierTemplate.get(dossierTemplateId).getRequestAddColor();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -28,14 +28,14 @@ public class DroolsExecutionService {
|
||||
|
||||
private final Map<String, KieContainer> kieContainers = new HashMap<>();
|
||||
|
||||
private final Map<String, Long> rulesVersionPerRuleSetId = new HashMap<>();
|
||||
private final Map<String, Long> rulesVersionPerDossierTemplateId = new HashMap<>();
|
||||
|
||||
|
||||
public KieContainer getKieContainer(String ruleSetId) {
|
||||
public KieContainer getKieContainer(String dossierTemplateId) {
|
||||
|
||||
KieContainer container = kieContainers.get(ruleSetId);
|
||||
KieContainer container = kieContainers.get(dossierTemplateId);
|
||||
if (container == null) {
|
||||
return createOrUpdateKieContainer(ruleSetId);
|
||||
return createOrUpdateKieContainer(dossierTemplateId);
|
||||
} else {
|
||||
return container;
|
||||
}
|
||||
@ -55,43 +55,43 @@ public class DroolsExecutionService {
|
||||
}
|
||||
|
||||
|
||||
public KieContainer updateRules(String ruleSetId) {
|
||||
public KieContainer updateRules(String dossierTemplateId) {
|
||||
|
||||
long version = rulesClient.getVersion(ruleSetId);
|
||||
Long rulesVersion = rulesVersionPerRuleSetId.get(ruleSetId);
|
||||
long version = rulesClient.getVersion(dossierTemplateId);
|
||||
Long rulesVersion = rulesVersionPerDossierTemplateId.get(dossierTemplateId);
|
||||
if (rulesVersion == null) {
|
||||
rulesVersion = -1L;
|
||||
}
|
||||
|
||||
if (version > rulesVersion.longValue()) {
|
||||
rulesVersionPerRuleSetId.put(ruleSetId, version);
|
||||
return createOrUpdateKieContainer(ruleSetId);
|
||||
rulesVersionPerDossierTemplateId.put(dossierTemplateId, version);
|
||||
return createOrUpdateKieContainer(dossierTemplateId);
|
||||
}
|
||||
return getKieContainer(ruleSetId);
|
||||
return getKieContainer(dossierTemplateId);
|
||||
|
||||
}
|
||||
|
||||
|
||||
private KieContainer createOrUpdateKieContainer(String ruleSetId) {
|
||||
private KieContainer createOrUpdateKieContainer(String dossierTemplateId) {
|
||||
|
||||
try {
|
||||
|
||||
RulesResponse rules = rulesClient.getRules(ruleSetId);
|
||||
RulesResponse rules = rulesClient.getRules(dossierTemplateId);
|
||||
if (rules == null || StringUtils.isEmpty(rules.getRules())) {
|
||||
throw new RuntimeException("Rules cannot be empty.");
|
||||
}
|
||||
|
||||
KieServices kieServices = KieServices.Factory.get();
|
||||
KieModule kieModule = getKieModule(ruleSetId, rules.getRules(), kieServices);
|
||||
KieModule kieModule = getKieModule(dossierTemplateId, rules.getRules(), kieServices);
|
||||
|
||||
var container = kieContainers.get(ruleSetId);
|
||||
var container = kieContainers.get(dossierTemplateId);
|
||||
if (container != null) {
|
||||
container.updateToVersion(kieModule.getReleaseId());
|
||||
return container;
|
||||
}
|
||||
|
||||
container = kieServices.newKieContainer(kieModule.getReleaseId());
|
||||
kieContainers.put(ruleSetId, container);
|
||||
kieContainers.put(dossierTemplateId, container);
|
||||
return container;
|
||||
} catch (Exception e) {
|
||||
throw new RulesValidationException("Could not update rules: " + e.getMessage(), e);
|
||||
@ -100,11 +100,11 @@ public class DroolsExecutionService {
|
||||
}
|
||||
|
||||
|
||||
private KieModule getKieModule(String ruleSetId, String rules, KieServices kieServices) {
|
||||
private KieModule getKieModule(String dossierTemplateId, String rules, KieServices kieServices) {
|
||||
|
||||
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
|
||||
InputStream input = new ByteArrayInputStream(rules.getBytes(StandardCharsets.UTF_8));
|
||||
kieFileSystem.write("src/main/resources/drools/rules" + ruleSetId + ".drl", kieServices.getResources()
|
||||
kieFileSystem.write("src/main/resources/drools/rules" + dossierTemplateId + ".drl", kieServices.getResources()
|
||||
.newInputStreamResource(input));
|
||||
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
|
||||
kieBuilder.buildAll();
|
||||
@ -122,9 +122,9 @@ public class DroolsExecutionService {
|
||||
}
|
||||
|
||||
|
||||
public long getRulesVersion(String ruleSetId) {
|
||||
public long getRulesVersion(String dossierTemplateId) {
|
||||
|
||||
Long rulesVersion = rulesVersionPerRuleSetId.get(ruleSetId);
|
||||
Long rulesVersion = rulesVersionPerDossierTemplateId.get(dossierTemplateId);
|
||||
if (rulesVersion == null) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -33,13 +33,13 @@ public class EntityRedactionService {
|
||||
private final SurroundingWordsService surroundingWordsService;
|
||||
|
||||
|
||||
public void processDocument(Document classifiedDoc, String ruleSetId, ManualRedactions manualRedactions, String dossierId) {
|
||||
public void processDocument(Document classifiedDoc, String dossierTemplateId, ManualRedactions manualRedactions, String dossierId) {
|
||||
|
||||
dictionaryService.updateDictionary(ruleSetId, dossierId);
|
||||
KieContainer container = droolsExecutionService.updateRules(ruleSetId);
|
||||
long rulesVersion = droolsExecutionService.getRulesVersion(ruleSetId);
|
||||
dictionaryService.updateDictionary(dossierTemplateId, dossierId);
|
||||
KieContainer container = droolsExecutionService.updateRules(dossierTemplateId);
|
||||
long rulesVersion = droolsExecutionService.getRulesVersion(dossierTemplateId);
|
||||
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(ruleSetId, dossierId);
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(dossierTemplateId, dossierId);
|
||||
|
||||
Set<Entity> documentEntities = new HashSet<>(findEntities(classifiedDoc, container, manualRedactions, dictionary, false, null));
|
||||
|
||||
@ -75,7 +75,7 @@ public class EntityRedactionService {
|
||||
}
|
||||
}
|
||||
|
||||
dictionaryService.updateExternalDictionary(dictionary, ruleSetId);
|
||||
dictionaryService.updateExternalDictionary(dictionary, dossierTemplateId);
|
||||
|
||||
classifiedDoc.setDictionaryVersion(dictionary.getVersion());
|
||||
classifiedDoc.setRulesVersion(rulesVersion);
|
||||
|
||||
@ -5,6 +5,7 @@ import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClien
|
||||
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.client.MockMultipartFile;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
@ -26,7 +27,7 @@ public class ImageClassificationService {
|
||||
|
||||
page.getImages().forEach(image -> {
|
||||
|
||||
if (settings.isEnableImageClassification()) {
|
||||
if (settings.isEnableImageClassification() && !isEntirePageImage(image, page)) {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||
@ -38,7 +39,6 @@ public class ImageClassificationService {
|
||||
log.error("Could not classify image", e);
|
||||
image.setImageType(ImageType.OTHER);
|
||||
}
|
||||
|
||||
log.info("Image classification took: " + (System.currentTimeMillis() - start));
|
||||
} else {
|
||||
image.setImageType(ImageType.OTHER);
|
||||
@ -59,4 +59,13 @@ public class ImageClassificationService {
|
||||
|
||||
}
|
||||
|
||||
private boolean isEntirePageImage(PdfImage image, Page page){
|
||||
double imageArea = image.getPosition().getHeight() * image.getPosition().getWidth();
|
||||
if(imageArea / page.getCropBoxArea() >= settings.getMaxImageCropboxRatio()){
|
||||
log.info("Skipping image classification because images is almost as large as the entire page");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -5,6 +5,7 @@ import com.iqser.red.service.redaction.v1.model.*;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Text;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
|
||||
@ -38,7 +39,7 @@ public class ReanalyzeService {
|
||||
private final PdfSegmentationService pdfSegmentationService;
|
||||
private final RedactionChangeLogService redactionChangeLogService;
|
||||
private final AnalyzeResponseService analyzeResponseService;
|
||||
|
||||
private final LegalBasisClient legalBasisClient;
|
||||
|
||||
public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) {
|
||||
|
||||
@ -49,7 +50,7 @@ public class ReanalyzeService {
|
||||
|
||||
try {
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest
|
||||
.getProjectId(), analyzeRequest.getFileId(), FileType.ORIGIN));
|
||||
.getDossierId(), analyzeRequest.getFileId(), FileType.ORIGIN));
|
||||
classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream);
|
||||
pageCount = classifiedDoc.getPages().size();
|
||||
} catch (Exception e) {
|
||||
@ -57,31 +58,34 @@ public class ReanalyzeService {
|
||||
}
|
||||
log.info("Document structure analysis successful, starting redaction analysis...");
|
||||
|
||||
entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getRuleSetId(), analyzeRequest.getManualRedactions(), analyzeRequest
|
||||
.getProjectId());
|
||||
entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getDossierTemplateId(), analyzeRequest.getManualRedactions(), analyzeRequest
|
||||
.getDossierId());
|
||||
redactionLogCreatorService.createRedactionLog(classifiedDoc, pageCount, analyzeRequest.getManualRedactions(), analyzeRequest
|
||||
.getRuleSetId());
|
||||
.getDossierTemplateId());
|
||||
|
||||
log.info("Redaction analysis successful...");
|
||||
|
||||
var redactionLog = new RedactionLog(classifiedDoc.getRedactionLogEntities(), classifiedDoc.getDictionaryVersion()
|
||||
.getRulesetVersion(), classifiedDoc.getRulesVersion(), analyzeRequest.getRuleSetId(), classifiedDoc.getDictionaryVersion()
|
||||
.getDossierVersion());
|
||||
var legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId());
|
||||
var redactionLog = new RedactionLog(classifiedDoc.getRedactionLogEntities(),legalBasis,
|
||||
classifiedDoc.getDictionaryVersion().getDossierTemplateVersion(),
|
||||
classifiedDoc.getDictionaryVersion().getDossierVersion(),
|
||||
classifiedDoc.getRulesVersion(),
|
||||
legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()));
|
||||
|
||||
log.info("Analyzed with rules {} and dictionary {} for ruleSet: {}", classifiedDoc.getRulesVersion(), classifiedDoc
|
||||
.getDictionaryVersion(), analyzeRequest.getRuleSetId());
|
||||
log.info("Analyzed with rules {} and dictionary {} for dossierTemplate: {}", classifiedDoc.getRulesVersion(), classifiedDoc
|
||||
.getDictionaryVersion(), analyzeRequest.getDossierTemplateId());
|
||||
|
||||
// first create changelog - this only happens when we migrate files analyzed via the old process and we don't want to loose changeLog data
|
||||
var changeLog = redactionChangeLogService.createAndStoreChangeLog(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), redactionLog);
|
||||
var changeLog = redactionChangeLogService.createAndStoreChangeLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), redactionLog);
|
||||
// store redactionLog
|
||||
redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLog);
|
||||
redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.TEXT, new Text(pageCount, classifiedDoc
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLog);
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.TEXT, new Text(pageCount, classifiedDoc
|
||||
.getSectionText()));
|
||||
redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, classifiedDoc
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, classifiedDoc
|
||||
.getSectionGrid());
|
||||
|
||||
long duration = System.currentTimeMillis() - startTime;
|
||||
return analyzeResponseService.createAnalyzeResponse(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), duration, pageCount, redactionLog, changeLog);
|
||||
return analyzeResponseService.createAnalyzeResponse(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), duration, pageCount, redactionLog, changeLog);
|
||||
}
|
||||
|
||||
|
||||
@ -90,16 +94,16 @@ public class ReanalyzeService {
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
var redactionLog = redactionStorageService.getRedactionLog(analyzeRequest.getProjectId(), analyzeRequest.getFileId());
|
||||
var text = redactionStorageService.getText(analyzeRequest.getProjectId(), analyzeRequest.getFileId());
|
||||
var redactionLog = redactionStorageService.getRedactionLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
var text = redactionStorageService.getText(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
|
||||
// not yet ready for reanalysis
|
||||
if (redactionLog == null || text == null || text.getNumberOfPages() == 0) {
|
||||
return analyze(analyzeRequest);
|
||||
}
|
||||
|
||||
DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(analyzeRequest.getRuleSetId(), new DictionaryVersion(redactionLog
|
||||
.getDictionaryVersion(), redactionLog.getDossierDictionaryVersion()), analyzeRequest.getProjectId());
|
||||
DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(analyzeRequest.getDossierTemplateId(), new DictionaryVersion(redactionLog
|
||||
.getDictionaryVersion(), redactionLog.getDossierDictionaryVersion()), analyzeRequest.getDossierId());
|
||||
|
||||
Set<String> manualForceAndRemoveIds = getForceAndRemoveIds(analyzeRequest.getManualRedactions());
|
||||
Map<String, List<Comment>> comments = null;
|
||||
@ -159,9 +163,9 @@ public class ReanalyzeService {
|
||||
|
||||
//--
|
||||
|
||||
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getRuleSetId());
|
||||
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
|
||||
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getRuleSetId(), analyzeRequest.getProjectId());
|
||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
|
||||
|
||||
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
|
||||
for (SectionText reanalysisSection : reanalysisSections) {
|
||||
@ -224,21 +228,20 @@ public class ReanalyzeService {
|
||||
for (int page = 1; page <= text.getNumberOfPages(); page++) {
|
||||
if (entitiesPerPage.get(page) != null) {
|
||||
newRedactionLogEntries.addAll(redactionLogCreatorService.addEntries(entitiesPerPage, analyzeRequest.getManualRedactions(), page, analyzeRequest
|
||||
.getRuleSetId()));
|
||||
.getDossierTemplateId()));
|
||||
}
|
||||
|
||||
if (imagesPerPage.get(page) != null) {
|
||||
newRedactionLogEntries.addAll(redactionLogCreatorService.addImageEntries(imagesPerPage, analyzeRequest.getManualRedactions(), page, analyzeRequest
|
||||
.getRuleSetId()));
|
||||
.getDossierTemplateId()));
|
||||
}
|
||||
|
||||
newRedactionLogEntries.addAll(redactionLogCreatorService.addManualAddEntries(manualAdds, comments, page, analyzeRequest
|
||||
.getRuleSetId()));
|
||||
.getDossierTemplateId()));
|
||||
}
|
||||
|
||||
redactionLog.getRedactionLogEntry()
|
||||
.removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.isImage() || entry.getSectionNumber() == 0 && !entry
|
||||
.isImage());
|
||||
.removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()));
|
||||
redactionLog.getRedactionLogEntry().addAll(newRedactionLogEntries);
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement);
|
||||
|
||||
@ -249,15 +252,15 @@ public class ReanalyzeService {
|
||||
RedactionLog redactionLog, Text text,
|
||||
DictionaryIncrement dictionaryIncrement) {
|
||||
|
||||
redactionLog.setDictionaryVersion(dictionaryIncrement.getDictionaryVersion().getRulesetVersion());
|
||||
redactionLog.setDictionaryVersion(dictionaryIncrement.getDictionaryVersion().getDossierTemplateVersion());
|
||||
redactionLog.setDossierDictionaryVersion(dictionaryIncrement.getDictionaryVersion().getDossierVersion());
|
||||
|
||||
var changeLog = redactionChangeLogService.createAndStoreChangeLog(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), redactionLog);
|
||||
redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLog);
|
||||
var changeLog = redactionChangeLogService.createAndStoreChangeLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), redactionLog);
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLog);
|
||||
|
||||
long duration = System.currentTimeMillis() - startTime;
|
||||
|
||||
return analyzeResponseService.createAnalyzeResponse(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), duration, text
|
||||
return analyzeResponseService.createAnalyzeResponse(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), duration, text
|
||||
.getNumberOfPages(), redactionLog, changeLog);
|
||||
}
|
||||
|
||||
|
||||
@ -22,12 +22,12 @@ public class RedactionChangeLogService {
|
||||
|
||||
private final RedactionStorageService redactionStorageService;
|
||||
|
||||
public RedactionChangeLog createAndStoreChangeLog(String projectId, String fileId, RedactionLog currentRedactionLog) {
|
||||
public RedactionChangeLog createAndStoreChangeLog(String dossierId, String fileId, RedactionLog currentRedactionLog) {
|
||||
|
||||
try {
|
||||
RedactionLog previousRedactionLog = redactionStorageService.getRedactionLog(projectId, fileId);
|
||||
RedactionLog previousRedactionLog = redactionStorageService.getRedactionLog(dossierId, fileId);
|
||||
var changeLog = createChangeLog(currentRedactionLog, previousRedactionLog);
|
||||
redactionStorageService.storeObject(projectId, fileId, FileType.REDACTION_CHANGELOG, changeLog);
|
||||
redactionStorageService.storeObject(dossierId, fileId, FileType.REDACTION_CHANGELOG, changeLog);
|
||||
return changeLog;
|
||||
} catch (Exception e) {
|
||||
log.debug("Previous redaction log not available");
|
||||
@ -57,8 +57,11 @@ public class RedactionChangeLogService {
|
||||
.map(entry -> convert(entry, ChangeType.REMOVED))
|
||||
.collect(Collectors.toList()));
|
||||
|
||||
return new RedactionChangeLog(changeLogEntries, currentRedactionLog.getDictionaryVersion(), currentRedactionLog.getRulesVersion(), currentRedactionLog
|
||||
.getRuleSetId());
|
||||
return new RedactionChangeLog(changeLogEntries,
|
||||
currentRedactionLog.getDictionaryVersion(),
|
||||
currentRedactionLog.getDossierDictionaryVersion(),
|
||||
currentRedactionLog.getRulesVersion(),
|
||||
currentRedactionLog.getLegalBasisVersion());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -32,7 +32,7 @@ public class RedactionLogCreatorService {
|
||||
|
||||
|
||||
public void createRedactionLog(Document classifiedDoc, int numberOfPages, ManualRedactions manualRedactions,
|
||||
String ruleSetId) {
|
||||
String dossierTemplateId) {
|
||||
|
||||
Set<Integer> manualRedactionPages = getManualRedactionPages(manualRedactions);
|
||||
|
||||
@ -42,24 +42,24 @@ public class RedactionLogCreatorService {
|
||||
|
||||
if (classifiedDoc.getEntities().get(page) != null) {
|
||||
classifiedDoc.getRedactionLogEntities()
|
||||
.addAll(addEntries(classifiedDoc.getEntities(), manualRedactions, page, ruleSetId));
|
||||
.addAll(addEntries(classifiedDoc.getEntities(), manualRedactions, page, dossierTemplateId));
|
||||
}
|
||||
|
||||
if (manualRedactionPages.contains(page)) {
|
||||
classifiedDoc.getRedactionLogEntities()
|
||||
.addAll(addManualAddEntries(manualRedactions.getEntriesToAdd(), manualRedactions.getComments(), page, ruleSetId));
|
||||
.addAll(addManualAddEntries(manualRedactions.getEntriesToAdd(), manualRedactions.getComments(), page, dossierTemplateId));
|
||||
}
|
||||
|
||||
if (classifiedDoc.getImages().get(page) != null && !classifiedDoc.getImages().get(page).isEmpty()) {
|
||||
classifiedDoc.getRedactionLogEntities()
|
||||
.addAll(addImageEntries(classifiedDoc.getImages(), manualRedactions, page, ruleSetId));
|
||||
.addAll(addImageEntries(classifiedDoc.getImages(), manualRedactions, page, dossierTemplateId));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public List<RedactionLogEntry> addImageEntries(Map<Integer, Set<Image>> images, ManualRedactions manualRedactions,
|
||||
int pageNumber, String ruleSetId) {
|
||||
int pageNumber, String dossierTemplateId) {
|
||||
|
||||
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
|
||||
|
||||
@ -69,14 +69,14 @@ public class RedactionLogCreatorService {
|
||||
|
||||
RedactionLogEntry redactionLogEntry = RedactionLogEntry.builder()
|
||||
.id(id)
|
||||
.color(getColorForImage(image, ruleSetId, false))
|
||||
.color(getColorForImage(image, dossierTemplateId, false))
|
||||
.isImage(true)
|
||||
.type(image.getType())
|
||||
.redacted(image.isRedaction())
|
||||
.reason(image.getRedactionReason())
|
||||
.legalBasis(image.getLegalBasis())
|
||||
.matchedRule(image.getMatchedRule())
|
||||
.isHint(dictionaryService.isHint(image.getType(), ruleSetId))
|
||||
.isHint(dictionaryService.isHint(image.getType(), dossierTemplateId))
|
||||
.manual(false)
|
||||
.isDictionaryEntry(false)
|
||||
.isRecommendation(false)
|
||||
@ -96,11 +96,11 @@ public class RedactionLogCreatorService {
|
||||
redactionLogEntry.setRedacted(false);
|
||||
redactionLogEntry.setStatus(Status.APPROVED);
|
||||
manualOverrideReason = image.getRedactionReason() + ", removed by manual override";
|
||||
redactionLogEntry.setColor(getColorForImage(image, ruleSetId, false));
|
||||
redactionLogEntry.setColor(getColorForImage(image, dossierTemplateId, false));
|
||||
} else if (manualRemoval.getStatus().equals(Status.REQUESTED)) {
|
||||
manualOverrideReason = image.getRedactionReason() + ", requested to remove";
|
||||
redactionLogEntry.setStatus(Status.REQUESTED);
|
||||
redactionLogEntry.setColor(getColorForImage(image, ruleSetId, true));
|
||||
redactionLogEntry.setColor(getColorForImage(image, dossierTemplateId, true));
|
||||
} else {
|
||||
redactionLogEntry.setStatus(Status.DECLINED);
|
||||
}
|
||||
@ -121,13 +121,13 @@ public class RedactionLogCreatorService {
|
||||
image.setRedaction(true);
|
||||
redactionLogEntry.setRedacted(true);
|
||||
redactionLogEntry.setStatus(Status.APPROVED);
|
||||
redactionLogEntry.setColor(getColorForImage(image, ruleSetId, false));
|
||||
redactionLogEntry.setColor(getColorForImage(image, dossierTemplateId, false));
|
||||
manualOverrideReason = image.getRedactionReason() + ", forced by manual override";
|
||||
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
|
||||
} else if (manualForceRedact.getStatus().equals(Status.REQUESTED)) {
|
||||
manualOverrideReason = image.getRedactionReason() + ", requested to force redact";
|
||||
redactionLogEntry.setStatus(Status.REQUESTED);
|
||||
redactionLogEntry.setColor(getColorForImage(image, ruleSetId, true));
|
||||
redactionLogEntry.setColor(getColorForImage(image, dossierTemplateId, true));
|
||||
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
|
||||
} else {
|
||||
redactionLogEntry.setStatus(Status.DECLINED);
|
||||
@ -166,7 +166,7 @@ public class RedactionLogCreatorService {
|
||||
|
||||
|
||||
public List<RedactionLogEntry> addEntries(Map<Integer, List<Entity>> entities, ManualRedactions manualRedactions,
|
||||
int page, String ruleSetId) {
|
||||
int page, String dossierTemplateId) {
|
||||
|
||||
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
|
||||
|
||||
@ -180,7 +180,7 @@ public class RedactionLogCreatorService {
|
||||
|
||||
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
|
||||
|
||||
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(entity, ruleSetId);
|
||||
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(entity, dossierTemplateId);
|
||||
if (processedIds.contains(entityPositionSequence.getId())) {
|
||||
|
||||
// TODO refactor this outer loop jump as soon as we have the time.
|
||||
@ -199,11 +199,11 @@ public class RedactionLogCreatorService {
|
||||
redactionLogEntry.setRedacted(false);
|
||||
redactionLogEntry.setStatus(Status.APPROVED);
|
||||
manualOverrideReason = entity.getRedactionReason() + ", removed by manual override";
|
||||
redactionLogEntry.setColor(getColor(entity, ruleSetId, false));
|
||||
redactionLogEntry.setColor(getColor(entity, dossierTemplateId, false));
|
||||
} else if (manualRemoval.getStatus().equals(Status.REQUESTED)) {
|
||||
manualOverrideReason = entity.getRedactionReason() + ", requested to remove";
|
||||
redactionLogEntry.setStatus(Status.REQUESTED);
|
||||
redactionLogEntry.setColor(getColor(entity, ruleSetId, true));
|
||||
redactionLogEntry.setColor(getColor(entity, dossierTemplateId, true));
|
||||
} else {
|
||||
redactionLogEntry.setStatus(Status.DECLINED);
|
||||
}
|
||||
@ -224,13 +224,13 @@ public class RedactionLogCreatorService {
|
||||
entity.setRedaction(true);
|
||||
redactionLogEntry.setRedacted(true);
|
||||
redactionLogEntry.setStatus(Status.APPROVED);
|
||||
redactionLogEntry.setColor(getColor(entity, ruleSetId, false));
|
||||
redactionLogEntry.setColor(getColor(entity, dossierTemplateId, false));
|
||||
manualOverrideReason = entity.getRedactionReason() + ", forced by manual override";
|
||||
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
|
||||
} else if (manualForceRedact.getStatus().equals(Status.REQUESTED)) {
|
||||
manualOverrideReason = entity.getRedactionReason() + ", requested to force redact";
|
||||
redactionLogEntry.setStatus(Status.REQUESTED);
|
||||
redactionLogEntry.setColor(getColor(entity, ruleSetId, true));
|
||||
redactionLogEntry.setColor(getColor(entity, dossierTemplateId, true));
|
||||
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
|
||||
} else {
|
||||
redactionLogEntry.setStatus(Status.DECLINED);
|
||||
@ -299,7 +299,7 @@ public class RedactionLogCreatorService {
|
||||
|
||||
public List<RedactionLogEntry> addManualAddEntries(Set<ManualRedactionEntry> manualAdds,
|
||||
Map<String, List<Comment>> comments, int page,
|
||||
String ruleSetId) {
|
||||
String dossierTemplateId) {
|
||||
|
||||
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
|
||||
|
||||
@ -311,7 +311,7 @@ public class RedactionLogCreatorService {
|
||||
|
||||
String id = manualRedactionEntry.getId();
|
||||
|
||||
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(manualRedactionEntry, id, ruleSetId);
|
||||
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(manualRedactionEntry, id, dossierTemplateId);
|
||||
|
||||
List<Rectangle> rectanglesOnPage = new ArrayList<>();
|
||||
for (Rectangle rectangle : manualRedactionEntry.getPositions()) {
|
||||
@ -338,11 +338,11 @@ public class RedactionLogCreatorService {
|
||||
|
||||
|
||||
private RedactionLogEntry createRedactionLogEntry(ManualRedactionEntry manualRedactionEntry, String id,
|
||||
String ruleSetId) {
|
||||
String dossierTemplateId) {
|
||||
|
||||
return RedactionLogEntry.builder()
|
||||
.id(id)
|
||||
.color(getColorForManualAdd(manualRedactionEntry.getType(), ruleSetId, manualRedactionEntry.getStatus()))
|
||||
.color(getColorForManualAdd(manualRedactionEntry.getType(), dossierTemplateId, manualRedactionEntry.getStatus()))
|
||||
.reason(manualRedactionEntry.getReason())
|
||||
.legalBasis(manualRedactionEntry.getLegalBasis())
|
||||
.value(manualRedactionEntry.getValue())
|
||||
@ -360,17 +360,17 @@ public class RedactionLogCreatorService {
|
||||
}
|
||||
|
||||
|
||||
private RedactionLogEntry createRedactionLogEntry(Entity entity, String ruleSetId) {
|
||||
private RedactionLogEntry createRedactionLogEntry(Entity entity, String dossierTemplateId) {
|
||||
|
||||
return RedactionLogEntry.builder()
|
||||
.color(getColor(entity, ruleSetId, false))
|
||||
.color(getColor(entity, dossierTemplateId, false))
|
||||
.reason(entity.getRedactionReason())
|
||||
.legalBasis(entity.getLegalBasis())
|
||||
.value(entity.getWord())
|
||||
.type(entity.getType())
|
||||
.redacted(entity.isRedaction())
|
||||
.isHint(isHint(entity, ruleSetId))
|
||||
.isRecommendation(isRecommendation(entity, ruleSetId))
|
||||
.isHint(isHint(entity, dossierTemplateId))
|
||||
.isRecommendation(isRecommendation(entity, dossierTemplateId))
|
||||
.section(entity.getHeadline())
|
||||
.sectionNumber(entity.getSectionNumber())
|
||||
.matchedRule(entity.getMatchedRule())
|
||||
@ -384,56 +384,56 @@ public class RedactionLogCreatorService {
|
||||
}
|
||||
|
||||
|
||||
private float[] getColor(Entity entity, String ruleSetId, boolean requestedToRemove) {
|
||||
private float[] getColor(Entity entity, String dossierTemplateId, boolean requestedToRemove) {
|
||||
|
||||
if (requestedToRemove) {
|
||||
return dictionaryService.getRequestRemoveColor(ruleSetId);
|
||||
return dictionaryService.getRequestRemoveColor(dossierTemplateId);
|
||||
}
|
||||
if (!entity.isRedaction() && !isHint(entity, ruleSetId)) {
|
||||
return dictionaryService.getNotRedactedColor(ruleSetId);
|
||||
if (!entity.isRedaction() && !isHint(entity, dossierTemplateId)) {
|
||||
return dictionaryService.getNotRedactedColor(dossierTemplateId);
|
||||
}
|
||||
return dictionaryService.getColor(entity.getType(), ruleSetId);
|
||||
return dictionaryService.getColor(entity.getType(), dossierTemplateId);
|
||||
}
|
||||
|
||||
|
||||
private float[] getColorForManualAdd(String type, String ruleSetId, Status status) {
|
||||
private float[] getColorForManualAdd(String type, String dossierTemplateId, Status status) {
|
||||
|
||||
if (status.equals(Status.REQUESTED)) {
|
||||
return dictionaryService.getRequestAddColor(ruleSetId);
|
||||
return dictionaryService.getRequestAddColor(dossierTemplateId);
|
||||
} else if (status.equals(Status.DECLINED)) {
|
||||
return dictionaryService.getNotRedactedColor(ruleSetId);
|
||||
return dictionaryService.getNotRedactedColor(dossierTemplateId);
|
||||
}
|
||||
return getColor(type, ruleSetId);
|
||||
return getColor(type, dossierTemplateId);
|
||||
}
|
||||
|
||||
|
||||
private float[] getColor(String type, String ruleSetId) {
|
||||
private float[] getColor(String type, String dossierTemplateId) {
|
||||
|
||||
return dictionaryService.getColor(type, ruleSetId);
|
||||
return dictionaryService.getColor(type, dossierTemplateId);
|
||||
}
|
||||
|
||||
|
||||
private float[] getColorForImage(Image image, String ruleSetId, boolean requestedToRemove) {
|
||||
private float[] getColorForImage(Image image, String dossierTemplateId, boolean requestedToRemove) {
|
||||
|
||||
if (requestedToRemove) {
|
||||
return dictionaryService.getRequestRemoveColor(ruleSetId);
|
||||
return dictionaryService.getRequestRemoveColor(dossierTemplateId);
|
||||
}
|
||||
if (!image.isRedaction() && !dictionaryService.isHint(image.getType(), ruleSetId)) {
|
||||
return dictionaryService.getNotRedactedColor(ruleSetId);
|
||||
if (!image.isRedaction() && !dictionaryService.isHint(image.getType(), dossierTemplateId)) {
|
||||
return dictionaryService.getNotRedactedColor(dossierTemplateId);
|
||||
}
|
||||
return dictionaryService.getColor(image.getType(), ruleSetId);
|
||||
return dictionaryService.getColor(image.getType(), dossierTemplateId);
|
||||
}
|
||||
|
||||
|
||||
private boolean isHint(Entity entity, String ruleSetId) {
|
||||
private boolean isHint(Entity entity, String dossierTemplateId) {
|
||||
|
||||
return dictionaryService.isHint(entity.getType(), ruleSetId);
|
||||
return dictionaryService.isHint(entity.getType(), dossierTemplateId);
|
||||
}
|
||||
|
||||
|
||||
private boolean isRecommendation(Entity entity, String ruleSetId) {
|
||||
private boolean isRecommendation(Entity entity, String dossierTemplateId) {
|
||||
|
||||
return dictionaryService.isRecommendation(entity.getType(), ruleSetId);
|
||||
return dictionaryService.isRecommendation(entity.getType(), dossierTemplateId);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
package com.iqser.red.service.redaction.v1.server.segmentation;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
@ -93,10 +94,25 @@ public class PdfSegmentationService {
|
||||
Page page = blockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings
|
||||
.getVertical());
|
||||
|
||||
PDRectangle cropbox = pdPage.getCropBox();
|
||||
float cropboxArea = cropbox.getHeight() * cropbox.getWidth();
|
||||
page.setCropBoxArea(cropboxArea);
|
||||
|
||||
page.setRotation(rotation);
|
||||
page.setLandscape(isLandscape || isRotated);
|
||||
page.setPageNumber(pageNumber);
|
||||
List<PdfImage> mergedList = processImages(stripper.getImages());
|
||||
|
||||
List<PdfImage> imagesInImage = new ArrayList<>();
|
||||
for(PdfImage image: mergedList){
|
||||
for (PdfImage inner: mergedList){
|
||||
if(image != inner && image.getPosition().contains(inner.getPosition().getX(), inner.getPosition().getY(), inner.getPosition().getWidth(), inner.getPosition().getHeight())){
|
||||
imagesInImage.add(inner);
|
||||
}
|
||||
}
|
||||
}
|
||||
mergedList.removeAll(imagesInImage);
|
||||
|
||||
page.setImages(mergedList);
|
||||
|
||||
tableExtractionService.extractTables(cleanRulings, page);
|
||||
|
||||
@ -13,4 +13,6 @@ public class RedactionServiceSettings {
|
||||
|
||||
private boolean enableImageClassification = true;
|
||||
|
||||
private float maxImageCropboxRatio = 0.9f;
|
||||
|
||||
}
|
||||
|
||||
@ -32,16 +32,16 @@ public class RedactionStorageService {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void storeObject(String projectId, String fileId, FileType fileType, Object any) {
|
||||
storageService.storeObject(StorageIdUtils.getStorageId(projectId, fileId, fileType), objectMapper.writeValueAsBytes(any));
|
||||
public void storeObject(String dossierId, String fileId, FileType fileType, Object any) {
|
||||
storageService.storeObject(StorageIdUtils.getStorageId(dossierId, fileId, fileType), objectMapper.writeValueAsBytes(any));
|
||||
}
|
||||
|
||||
|
||||
public RedactionLog getRedactionLog(String projectId, String fileId) {
|
||||
public RedactionLog getRedactionLog(String dossierId, String fileId) {
|
||||
|
||||
InputStreamResource inputStreamResource;
|
||||
try {
|
||||
inputStreamResource = storageService.getObject(StorageIdUtils.getStorageId(projectId, fileId, FileType.REDACTION_LOG));
|
||||
inputStreamResource = storageService.getObject(StorageIdUtils.getStorageId(dossierId, fileId, FileType.REDACTION_LOG));
|
||||
} catch (StorageObjectDoesNotExist e) {
|
||||
log.debug("Text not available.");
|
||||
return null;
|
||||
@ -55,11 +55,11 @@ public class RedactionStorageService {
|
||||
}
|
||||
|
||||
|
||||
public Text getText(String projectId, String fileId) {
|
||||
public Text getText(String dossierId, String fileId) {
|
||||
|
||||
InputStreamResource inputStreamResource;
|
||||
try {
|
||||
inputStreamResource = storageService.getObject(StorageIdUtils.getStorageId(projectId, fileId, FileType.TEXT));
|
||||
inputStreamResource = storageService.getObject(StorageIdUtils.getStorageId(dossierId, fileId, FileType.TEXT));
|
||||
} catch (StorageObjectDoesNotExist e) {
|
||||
log.debug("Text not available.");
|
||||
return null;
|
||||
@ -73,9 +73,9 @@ public class RedactionStorageService {
|
||||
}
|
||||
|
||||
|
||||
public SectionGrid getSectionGrid(String projectId, String fileId) {
|
||||
public SectionGrid getSectionGrid(String dossierId, String fileId) {
|
||||
|
||||
var sectionGrid = storageService.getObject(StorageIdUtils.getStorageId(projectId, fileId, FileType.SECTION_GRID));
|
||||
var sectionGrid = storageService.getObject(StorageIdUtils.getStorageId(dossierId, fileId, FileType.SECTION_GRID));
|
||||
try {
|
||||
return objectMapper.readValue(sectionGrid.getInputStream(), SectionGrid.class);
|
||||
} catch (IOException e) {
|
||||
@ -95,8 +95,8 @@ public class RedactionStorageService {
|
||||
|
||||
public static class StorageIdUtils {
|
||||
|
||||
public static String getStorageId(String projectId, String fileId, FileType fileType) {
|
||||
return projectId + "/" + fileId + "." + fileType.name() + fileType.getExtension();
|
||||
public static String getStorageId(String dossierId, String fileId, FileType fileType) {
|
||||
return dossierId + "/" + fileId + "." + fileType.name() + fileType.getExtension();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -2,6 +2,8 @@ package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Orientation;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
@ -18,6 +20,8 @@ public abstract class AbstractTextContainer {
|
||||
protected String classification;
|
||||
protected int page;
|
||||
|
||||
private Orientation orientation = Orientation.NONE;
|
||||
|
||||
public abstract String getText();
|
||||
|
||||
public boolean contains(AbstractTextContainer other) {
|
||||
|
||||
@ -246,8 +246,12 @@ public class Ruling extends Line2D.Float {
|
||||
|
||||
public Ruling expand(float amount) {
|
||||
Ruling r = (Ruling) this.clone();
|
||||
r.setStart(this.getStart() - amount);
|
||||
r.setEnd(this.getEnd() + amount);
|
||||
try {
|
||||
r.setStart(this.getStart() - amount);
|
||||
r.setEnd(this.getEnd() + amount);
|
||||
} catch (UnsupportedOperationException e){
|
||||
log.warn("Could not expand ruling!");
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
@ -102,7 +102,7 @@ public class PdfVisualisationService {
|
||||
|
||||
contentStream.newLineAtOffset(textBlock.getMinX(), textBlock.getMaxY());
|
||||
|
||||
contentStream.showText(textBlock.getClassification());
|
||||
contentStream.showText(textBlock.getClassification() + textBlock.getOrientation());
|
||||
|
||||
contentStream.endText();
|
||||
}
|
||||
|
||||
@ -9,6 +9,7 @@ import com.iqser.red.service.redaction.v1.model.*;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||
import com.iqser.red.service.redaction.v1.server.memory.MemoryStats;
|
||||
@ -114,6 +115,9 @@ public class RedactionIntegrationTest {
|
||||
@MockBean
|
||||
private RabbitTemplate rabbitTemplate;
|
||||
|
||||
@MockBean
|
||||
private LegalBasisClient legalBasisClient;
|
||||
|
||||
private final Map<String, List<String>> dictionary = new HashMap<>();
|
||||
private final Map<String, List<String>> dossierDictionary = new HashMap<>();
|
||||
private final Map<String, String> typeColorMap = new HashMap<>();
|
||||
@ -124,8 +128,8 @@ public class RedactionIntegrationTest {
|
||||
private final Colors colors = new Colors();
|
||||
private final Map<String, Long> reanlysisVersions = new HashMap<>();
|
||||
|
||||
private final static String TEST_RULESET_ID = "123";
|
||||
private final static String TEST_PROJECT_ID = "123";
|
||||
private final static String TEST_DOSSIER_TEMPLATE_ID = "123";
|
||||
private final static String TEST_DOSSIER_ID = "123";
|
||||
private final static String TEST_FILE_ID = "123";
|
||||
|
||||
@Configuration
|
||||
@ -170,21 +174,21 @@ public class RedactionIntegrationTest {
|
||||
public void stubClients() {
|
||||
//Testkommentar
|
||||
|
||||
when(rulesClient.getVersion(TEST_RULESET_ID)).thenReturn(0L);
|
||||
when(rulesClient.getRules(TEST_RULESET_ID)).thenReturn(new RulesResponse(RULES));
|
||||
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(new RulesResponse(RULES));
|
||||
|
||||
loadDictionaryForTest();
|
||||
loadTypeForTest();
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypes(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(TypeResponse.builder()
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypes(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(TypeResponse.builder()
|
||||
.types(getTypeResponse())
|
||||
.build());
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID, TEST_PROJECT_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypes(TEST_RULESET_ID, TEST_PROJECT_ID)).thenReturn(TypeResponse.builder()
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, TEST_DOSSIER_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypes(TEST_DOSSIER_TEMPLATE_ID, TEST_DOSSIER_ID)).thenReturn(TypeResponse.builder()
|
||||
.types(List.of(TypeResult.builder()
|
||||
.type(DOSSIER_REDACTIONS)
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
|
||||
.hexColor( "#ffe187")
|
||||
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS))
|
||||
@ -193,28 +197,28 @@ public class RedactionIntegrationTest {
|
||||
.build()))
|
||||
.build());
|
||||
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(ADDRESS, false));
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(AUTHOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(SPONSOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(HINT_ONLY, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(HINT_ONLY, false));
|
||||
when(dictionaryClient.getDictionaryForType(MUST_REDACT, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(MUST_REDACT, false));
|
||||
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION, false));
|
||||
when(dictionaryClient.getDictionaryForType(TEST_METHOD, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(TEST_METHOD, false));
|
||||
when(dictionaryClient.getDictionaryForType(PII, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PII, false));
|
||||
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_AUTHOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(RECOMMENDATION_AUTHOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_ADDRESS, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(RECOMMENDATION_ADDRESS, false));
|
||||
when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(FALSE_POSITIVE, false));
|
||||
when(dictionaryClient.getDictionaryForType(PURITY, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PURITY, false));
|
||||
when(dictionaryClient.getDictionaryForType(IMAGE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(IMAGE, false));
|
||||
when(dictionaryClient.getDictionaryForType(OCR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(OCR, false));
|
||||
when(dictionaryClient.getDictionaryForType(LOGO, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(LOGO, false));
|
||||
when(dictionaryClient.getDictionaryForType(SIGNATURE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(SIGNATURE, false));
|
||||
when(dictionaryClient.getDictionaryForType(FORMULA, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(FORMULA, false));
|
||||
when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS, TEST_RULESET_ID, TEST_PROJECT_ID)).thenReturn(getDictionaryResponse(DOSSIER_REDACTIONS, true));
|
||||
when(dictionaryClient.getColors(TEST_RULESET_ID)).thenReturn(colors);
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(ADDRESS, false));
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(AUTHOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(SPONSOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(HINT_ONLY, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(HINT_ONLY, false));
|
||||
when(dictionaryClient.getDictionaryForType(MUST_REDACT, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(MUST_REDACT, false));
|
||||
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION, false));
|
||||
when(dictionaryClient.getDictionaryForType(TEST_METHOD, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(TEST_METHOD, false));
|
||||
when(dictionaryClient.getDictionaryForType(PII, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PII, false));
|
||||
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_AUTHOR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(RECOMMENDATION_AUTHOR, false));
|
||||
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_ADDRESS, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(RECOMMENDATION_ADDRESS, false));
|
||||
when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(FALSE_POSITIVE, false));
|
||||
when(dictionaryClient.getDictionaryForType(PURITY, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PURITY, false));
|
||||
when(dictionaryClient.getDictionaryForType(IMAGE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(IMAGE, false));
|
||||
when(dictionaryClient.getDictionaryForType(OCR, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(OCR, false));
|
||||
when(dictionaryClient.getDictionaryForType(LOGO, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(LOGO, false));
|
||||
when(dictionaryClient.getDictionaryForType(SIGNATURE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(SIGNATURE, false));
|
||||
when(dictionaryClient.getDictionaryForType(FORMULA, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(FORMULA, false));
|
||||
when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS, TEST_DOSSIER_TEMPLATE_ID, TEST_DOSSIER_ID)).thenReturn(getDictionaryResponse(DOSSIER_REDACTIONS, true));
|
||||
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
|
||||
}
|
||||
|
||||
|
||||
@ -458,7 +462,7 @@ public class RedactionIntegrationTest {
|
||||
.stream()
|
||||
.map(typeColor -> TypeResult.builder()
|
||||
.type(typeColor.getKey())
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
|
||||
.hexColor(typeColor.getValue())
|
||||
.isHint(hintTypeMap.get(typeColor.getKey()))
|
||||
.isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey()))
|
||||
@ -522,7 +526,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
Map<String, List<RedactionLogEntry>> duplicates = new HashMap<>();
|
||||
|
||||
var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID);
|
||||
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
redactionLog.getRedactionLogEntry().forEach(entry -> {
|
||||
duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry);
|
||||
@ -533,10 +537,10 @@ public class RedactionIntegrationTest {
|
||||
});
|
||||
|
||||
dictionary.get(AUTHOR).add("Drinking water");
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(1L);
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(1L);
|
||||
|
||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
||||
.projectId(TEST_PROJECT_ID)
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
.fileId(TEST_FILE_ID)
|
||||
.build());
|
||||
|
||||
@ -578,7 +582,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
Map<String, List<RedactionLogEntry>> duplicates = new HashMap<>();
|
||||
|
||||
var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID);
|
||||
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
redactionLog.getRedactionLogEntry().forEach(entry -> {
|
||||
duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry);
|
||||
@ -589,7 +593,7 @@ public class RedactionIntegrationTest {
|
||||
});
|
||||
|
||||
dictionary.get(AUTHOR).add("Drinking water");
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(1L);
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(1L);
|
||||
|
||||
long rstart = System.currentTimeMillis();
|
||||
reanalyzeService.reanalyze(request);
|
||||
@ -633,8 +637,8 @@ public class RedactionIntegrationTest {
|
||||
|
||||
AnalyzeResult result = reanalyzeService.analyze(request);
|
||||
|
||||
var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID);
|
||||
var text = redactionStorageService.getText(TEST_PROJECT_ID, TEST_FILE_ID);
|
||||
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
var text = redactionStorageService.getText(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
redactionLog.getRedactionLogEntry().forEach(entry -> {
|
||||
if (entry.isImage()) {
|
||||
@ -647,7 +651,7 @@ public class RedactionIntegrationTest {
|
||||
System.out.println("first analysis duration: " + (end - start));
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Test.json")) {
|
||||
fileOutputStream.write(objectMapper.writeValueAsBytes(redactionStorageService.getText(TEST_PROJECT_ID, TEST_FILE_ID)));
|
||||
fileOutputStream.write(objectMapper.writeValueAsBytes(redactionStorageService.getText(TEST_DOSSIER_ID, TEST_FILE_ID)));
|
||||
}
|
||||
|
||||
int correctFound = 0;
|
||||
@ -677,12 +681,12 @@ public class RedactionIntegrationTest {
|
||||
dictionary.get(AUTHOR).add("physical");
|
||||
reanlysisVersions.put("physical", 2L);
|
||||
|
||||
dictionary.get(VERTEBRATE).add("s-metolachlor");
|
||||
reanlysisVersions.put("s-metolachlor", 3L);
|
||||
// dictionary.get(VERTEBRATE).add("s-metolachlor");
|
||||
// reanlysisVersions.put("s-metolachlor", 3L);
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(3L);
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(3L);
|
||||
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
|
||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
AnalyzeResult reanalyzeResult = reanalyzeService.reanalyze(request);
|
||||
@ -691,7 +695,7 @@ public class RedactionIntegrationTest {
|
||||
System.out.println("reanalysis analysis duration: " + (end - start));
|
||||
|
||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
||||
.projectId(TEST_PROJECT_ID)
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
.fileId(TEST_FILE_ID)
|
||||
.build());
|
||||
|
||||
@ -712,7 +716,7 @@ public class RedactionIntegrationTest {
|
||||
AnalyzeResult result = reanalyzeService.analyze(request);
|
||||
|
||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
||||
.projectId(TEST_PROJECT_ID)
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
.fileId(TEST_FILE_ID)
|
||||
.build());
|
||||
|
||||
@ -783,7 +787,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
|
||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
||||
.projectId(TEST_PROJECT_ID)
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
.fileId(TEST_FILE_ID)
|
||||
.build());
|
||||
|
||||
@ -802,15 +806,15 @@ public class RedactionIntegrationTest {
|
||||
public void classificationTest() throws IOException {
|
||||
|
||||
System.out.println("classificationTest");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Trinexapac/93 Trinexapac-ethyl_RAR_03_Volume_3CA_B-1_2017-03-31.pdf");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/new/Single Study - Oral (Gavage) Mouse.pdf");
|
||||
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
|
||||
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||
.projectId(request.getProjectId())
|
||||
.dossierId(request.getDossierId())
|
||||
.fileId(request.getFileId())
|
||||
.ruleSetId(request.getRuleSetId())
|
||||
.dossierTemplateId(request.getDossierTemplateId())
|
||||
.build();
|
||||
|
||||
RedactionResult result = redactionController.classify(redactionRequest);
|
||||
@ -830,9 +834,9 @@ public class RedactionIntegrationTest {
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
|
||||
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||
.projectId(request.getProjectId())
|
||||
.dossierId(request.getDossierId())
|
||||
.fileId(request.getFileId())
|
||||
.ruleSetId(request.getRuleSetId())
|
||||
.dossierTemplateId(request.getDossierTemplateId())
|
||||
.build();
|
||||
|
||||
RedactionResult result = redactionController.sections(redactionRequest);
|
||||
@ -852,9 +856,9 @@ public class RedactionIntegrationTest {
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
|
||||
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||
.projectId(request.getProjectId())
|
||||
.dossierId(request.getDossierId())
|
||||
.fileId(request.getFileId())
|
||||
.ruleSetId(request.getRuleSetId())
|
||||
.dossierTemplateId(request.getDossierTemplateId())
|
||||
.build();
|
||||
|
||||
RedactionResult result = redactionController.htmlTables(redactionRequest);
|
||||
@ -874,9 +878,9 @@ public class RedactionIntegrationTest {
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
|
||||
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||
.projectId(request.getProjectId())
|
||||
.dossierId(request.getDossierId())
|
||||
.fileId(request.getFileId())
|
||||
.ruleSetId(request.getRuleSetId())
|
||||
.dossierTemplateId(request.getDossierTemplateId())
|
||||
.build();
|
||||
|
||||
RedactionResult result = redactionController.htmlTables(redactionRequest);
|
||||
@ -896,7 +900,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
AnalyzeResult result = reanalyzeService.analyze(request);
|
||||
|
||||
var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID);
|
||||
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
redactionLog.getRedactionLogEntry().forEach(entry -> {
|
||||
if (!entry.isHint()) {
|
||||
@ -917,15 +921,15 @@ public class RedactionIntegrationTest {
|
||||
private AnalyzeRequest prepareStorage(InputStream stream) {
|
||||
|
||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
.projectId(TEST_PROJECT_ID)
|
||||
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
.fileId(TEST_FILE_ID)
|
||||
.lastProcessed(OffsetDateTime.now())
|
||||
.build();
|
||||
|
||||
var bytes = IOUtils.toByteArray(stream);
|
||||
|
||||
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_PROJECT_ID, TEST_FILE_ID, FileType.ORIGIN), bytes);
|
||||
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), bytes);
|
||||
|
||||
return request;
|
||||
|
||||
@ -944,7 +948,7 @@ public class RedactionIntegrationTest {
|
||||
AnalyzeResult result = reanalyzeService.analyze(request);
|
||||
|
||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
||||
.projectId(TEST_PROJECT_ID)
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
.fileId(TEST_FILE_ID)
|
||||
.build());
|
||||
|
||||
|
||||
@ -7,6 +7,7 @@ import com.iqser.red.service.redaction.v1.server.Application;
|
||||
import com.iqser.red.service.redaction.v1.server.FileSystemBackedStorageService;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
@ -78,7 +79,10 @@ public class EntityRedactionServiceTest {
|
||||
@MockBean
|
||||
private AmazonS3 amazonS3;
|
||||
|
||||
private final static String TEST_RULESET_ID = "123";
|
||||
@MockBean
|
||||
private LegalBasisClient legalBasisClient;
|
||||
|
||||
private final static String TEST_DOSSIER_TEMPLATE_ID = "123";
|
||||
|
||||
@Configuration
|
||||
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
|
||||
@ -134,20 +138,20 @@ public class EntityRedactionServiceTest {
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H.")))
|
||||
.build();
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId");
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 author cells, 1 address, 1 Y and 2 N entities
|
||||
}
|
||||
@ -161,19 +165,19 @@ public class EntityRedactionServiceTest {
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H.")))
|
||||
.build();
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId");
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 author cells, 1 address, 1 Y and 2 N entities
|
||||
}
|
||||
@ -184,21 +188,21 @@ public class EntityRedactionServiceTest {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Cyprodinil/40 Cyprodinil - EU AIR3 - LCA Section 1" +
|
||||
" Supplement - Identity of the active substance - Reference list.pdf");
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId");
|
||||
assertThat(classifiedDoc.getEntities()
|
||||
.entrySet()
|
||||
.stream()
|
||||
@ -206,7 +210,7 @@ public class EntityRedactionServiceTest {
|
||||
pdfFileResource = new ClassPathResource("files/Compounds/27 A8637C - EU AIR3 - MCP Section 1 - Identity of " +
|
||||
"the plant protection product.pdf");
|
||||
classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId");
|
||||
assertThat(classifiedDoc.getEntities()
|
||||
.entrySet()
|
||||
.stream()
|
||||
@ -217,21 +221,21 @@ public class EntityRedactionServiceTest {
|
||||
public void testFalsePositiveInWrongCell() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Row With Ambiguous Redaction.pdf");
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId");
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream()
|
||||
.filter(entity -> entity.getMatchedRule() == 9)
|
||||
@ -279,26 +283,26 @@ public class EntityRedactionServiceTest {
|
||||
"\"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" section.redactBetween(\"Contact:\", \"Tel.:\", \"address\", 6,true, \"Applicant information was found\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" end";
|
||||
when(rulesClient.getVersion(TEST_RULESET_ID)).thenReturn(RULES_VERSION.incrementAndGet());
|
||||
when(rulesClient.getRules(TEST_RULESET_ID)).thenReturn(new RulesResponse(tableRules));
|
||||
droolsExecutionService.updateRules(TEST_RULESET_ID);
|
||||
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(RULES_VERSION.incrementAndGet());
|
||||
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(new RulesResponse(tableRules));
|
||||
droolsExecutionService.updateRules(TEST_DOSSIER_TEMPLATE_ID);
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Applicant Producer Table.pdf");
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId");
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream()
|
||||
.filter(entity -> entity.getMatchedRule() == 6)
|
||||
@ -318,26 +322,26 @@ public class EntityRedactionServiceTest {
|
||||
"Section(searchText.toLowerCase().contains(\"batches produced at\"))\n" + " then\n" + " section" +
|
||||
".redactIfPrecededBy(\"batches produced at\", \"sponsor\", 11, \"Redacted because it represents a " +
|
||||
"sponsor company\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" + " end";
|
||||
when(rulesClient.getVersion(TEST_RULESET_ID)).thenReturn(RULES_VERSION.incrementAndGet());
|
||||
when(rulesClient.getRules(TEST_RULESET_ID)).thenReturn(new RulesResponse(tableRules));
|
||||
droolsExecutionService.updateRules(TEST_RULESET_ID);
|
||||
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(RULES_VERSION.incrementAndGet());
|
||||
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(new RulesResponse(tableRules));
|
||||
droolsExecutionService.updateRules(TEST_DOSSIER_TEMPLATE_ID);
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/batches_new_line.pdf");
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
DictionaryResponse authorResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(authorResponse);
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(authorResponse);
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt"))))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId");
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream()
|
||||
.filter(entity -> entity.getMatchedRule() == 11)
|
||||
@ -355,19 +359,19 @@ public class EntityRedactionServiceTest {
|
||||
.entries(toDictionaryEntry(Arrays.asList("Bissig R.", "Thanei P.")))
|
||||
.build();
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId");
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(2); // two pages
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(8);
|
||||
assertThat(classifiedDoc.getEntities().get(2).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(5); // 2 names, 1 address, 2 Y
|
||||
@ -378,15 +382,15 @@ public class EntityRedactionServiceTest {
|
||||
.entries(toDictionaryEntry(Arrays.asList("Tribolet, R.", "Muir, G.", "Kühne-Thu, H.", "Close, C.")))
|
||||
.build();
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
addressResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
|
||||
classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId");
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(3);
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(9);
|
||||
@ -403,19 +407,19 @@ public class EntityRedactionServiceTest {
|
||||
.entries(toDictionaryEntry(Collections.singletonList("Aldershof S.")))
|
||||
.build();
|
||||
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")))
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.entries(Collections.emptyList())
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
|
||||
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId");
|
||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(6);
|
||||
}
|
||||
@ -442,31 +446,31 @@ public class EntityRedactionServiceTest {
|
||||
"\"Yes\"))\n" +
|
||||
" then\n" +
|
||||
" section.redactCell(\"Author(s)\", 9, \"name\", false, \"Redacted because row is a vertebrate study\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" section.redact(\"address\", 9, \"Redacted because row is a vertebrate study\", \"Reg (EC) No" +
|
||||
" section.redact(\"address\", 9, \"Redacted because row is a vertebrate sgitudy\", \"Reg (EC) No" +
|
||||
" 1107/2009 Art. 63 (2g)\");\n" +
|
||||
" section.highlightCell(\"Vertebrate study Y/N\", 9, \"must_redact\");\n" +
|
||||
" end";
|
||||
when(rulesClient.getVersion(TEST_RULESET_ID)).thenReturn(RULES_VERSION.incrementAndGet());
|
||||
when(rulesClient.getRules(TEST_RULESET_ID)).thenReturn(new RulesResponse(tableRules));
|
||||
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(RULES_VERSION.incrementAndGet());
|
||||
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(new RulesResponse(tableRules));
|
||||
TypeResponse typeResponse = TypeResponse.builder()
|
||||
.types(Arrays.asList(
|
||||
TypeResult.builder().ruleSetId(TEST_RULESET_ID).type(AUTHOR_CODE).hexColor("#ffff00").build(),
|
||||
TypeResult.builder().ruleSetId(TEST_RULESET_ID).type(ADDRESS_CODE).hexColor("#ff00ff").build(),
|
||||
TypeResult.builder().ruleSetId(TEST_RULESET_ID).type(SPONSOR_CODE).hexColor("#00ffff").build()))
|
||||
TypeResult.builder().dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID).type(AUTHOR_CODE).hexColor("#ffff00").build(),
|
||||
TypeResult.builder().dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID).type(ADDRESS_CODE).hexColor("#ff00ff").build(),
|
||||
TypeResult.builder().dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID).type(SPONSOR_CODE).hexColor("#00ffff").build()))
|
||||
.build();
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getAllTypes(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(typeResponse);
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
|
||||
when(dictionaryClient.getAllTypes(TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(typeResponse);
|
||||
|
||||
// Default empty return to prevent NPEs
|
||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
|
||||
DictionaryResponse addressResponse = DictionaryResponse.builder()
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
|
||||
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
|
||||
.build();
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
|
||||
|
||||
Colors colors = new Colors();
|
||||
colors.setDefaultColor("#acfc00");
|
||||
@ -474,7 +478,7 @@ public class EntityRedactionServiceTest {
|
||||
colors.setRequestAdd("#04b093");
|
||||
colors.setRequestRemove("#04b093");
|
||||
|
||||
when(dictionaryClient.getColors(TEST_RULESET_ID)).thenReturn(colors);
|
||||
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -5,6 +5,7 @@ import com.iqser.red.service.redaction.v1.server.Application;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
|
||||
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
@ -62,6 +63,9 @@ public class PdfSegmentationServiceTest {
|
||||
@MockBean
|
||||
private RabbitTemplate rabbitTemplate;
|
||||
|
||||
@MockBean
|
||||
private LegalBasisClient legalBasisClient;
|
||||
|
||||
@Configuration
|
||||
@EnableAutoConfiguration(exclude = { RabbitAutoConfiguration.class})
|
||||
public static class TestConfiguration {
|
||||
|
||||
@ -1652,3 +1652,5 @@ Zoecon Corp.
|
||||
Zoecon Corp., Palo Alto, USA
|
||||
Zyma SA
|
||||
Zyma SA, Nyon, Switzerland
|
||||
Mambo-Tox Ltd. Biomedical Sciences Building Bassett Crescent East Southampton SO16 7PX UK
|
||||
Syngenta Environmental Sciences Jealott’s Hill International Research Centre Bracknell, Berkshire RG42 6EY UK
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user