diff --git a/redaction-service-v1/pom.xml b/redaction-service-v1/pom.xml index 0c8ed5ac..124e5ae4 100644 --- a/redaction-service-v1/pom.xml +++ b/redaction-service-v1/pom.xml @@ -5,7 +5,7 @@ platform-dependency com.iqser.red - 1.0.8 + 1.1.2 4.0.0 @@ -32,7 +32,7 @@ com.iqser.red platform-commons-dependency - 1.2.5 + 1.2.9 import pom @@ -52,4 +52,4 @@ - \ No newline at end of file + diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeRequest.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeRequest.java index 469947e6..d8bcf4d0 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeRequest.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeRequest.java @@ -5,13 +5,19 @@ import lombok.Builder; import lombok.Data; import lombok.NoArgsConstructor; +import java.time.OffsetDateTime; + @Data @Builder @NoArgsConstructor @AllArgsConstructor public class AnalyzeRequest { - private byte[] document; + private String projectId; + private String fileId; private String ruleSetId; private ManualRedactions manualRedactions; + private OffsetDateTime lastProcessed; + } + diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeResult.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeResult.java index 56bf4b6a..ea9c8d4f 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeResult.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnalyzeResult.java @@ -12,8 +12,11 @@ import lombok.NoArgsConstructor; public class AnalyzeResult { private int numberOfPages; - private RedactionLog redactionLog; - private SectionGrid sectionGrid; - private Text text; + private boolean hasHints; + private boolean hasRequests; + private boolean hasRedactions; + private boolean hasImages; + private boolean hasUpdates; } + diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnnotateRequest.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnnotateRequest.java index ab9926e9..4f65d74e 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnnotateRequest.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/AnnotateRequest.java @@ -11,7 +11,6 @@ import lombok.NoArgsConstructor; @AllArgsConstructor public class AnnotateRequest { - private byte[] document; - private RedactionLog redactionLog; - private SectionGrid sectionGrid; + private String projectId; + private String fileId; } diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/Comment.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/Comment.java index f90d04d3..c45ae271 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/Comment.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/Comment.java @@ -1,12 +1,12 @@ package com.iqser.red.service.redaction.v1.model; -import java.time.OffsetDateTime; - import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.NoArgsConstructor; +import java.time.OffsetDateTime; + @Data @Builder @AllArgsConstructor diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualRedactionEntry.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualRedactionEntry.java index e3303495..bcec73a5 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualRedactionEntry.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualRedactionEntry.java @@ -1,13 +1,13 @@ package com.iqser.red.service.redaction.v1.model; -import java.util.ArrayList; -import java.util.List; - import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.NoArgsConstructor; +import java.util.ArrayList; +import java.util.List; + @Data @Builder @AllArgsConstructor diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualRedactions.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualRedactions.java index ee6e0ad1..af866d09 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualRedactions.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/ManualRedactions.java @@ -1,16 +1,16 @@ package com.iqser.red.service.redaction.v1.model; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - @Data @Builder @AllArgsConstructor diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java index 2d5f5acb..6b608be3 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionLogEntry.java @@ -1,13 +1,13 @@ package com.iqser.red.service.redaction.v1.model; -import java.util.ArrayList; -import java.util.List; - import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.NoArgsConstructor; +import java.util.ArrayList; +import java.util.List; + @Data @Builder @NoArgsConstructor diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionRequest.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionRequest.java index ab419184..fd525887 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionRequest.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionRequest.java @@ -11,7 +11,8 @@ import lombok.NoArgsConstructor; @AllArgsConstructor public class RedactionRequest { - private byte[] document; + private String projectId; + private String fileId; private String ruleSetId; private ManualRedactions manualRedactions; } diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionResult.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionResult.java index 398f9fa5..80650eab 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionResult.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RedactionResult.java @@ -13,7 +13,5 @@ public class RedactionResult { private byte[] document; private int numberOfPages; - private RedactionLog redactionLog; - private SectionGrid sectionGrid; } diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RenalyzeRequest.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RenalyzeRequest.java deleted file mode 100644 index e11fee5d..00000000 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/RenalyzeRequest.java +++ /dev/null @@ -1,22 +0,0 @@ -package com.iqser.red.service.redaction.v1.model; - -import java.time.OffsetDateTime; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class RenalyzeRequest { - - private byte[] document; - private String ruleSetId; - private ManualRedactions manualRedactions; - private Text text; - private RedactionLog redactionLog; - private OffsetDateTime lastProcessed; -} diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionArea.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionArea.java index 3e02dce8..07e67c9f 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionArea.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionArea.java @@ -27,7 +27,7 @@ public class SectionArea { private String header; public boolean contains(Rectangle other) { - return page == other.getPage() && this.topLeft.getX() <= other.getTopLeft().getX() && this.topLeft.getX() + this.getWidth() >= other.getTopLeft().getX() + other.getWidth() && this.getTopLeft().getY() <= other.getTopLeft().getY() && this.getTopLeft().getY() + this.getHeight() >= other.getTopLeft().getY() + other.getHeight(); + return page == other.getPage() && this.topLeft.getX() <= other.getTopLeft().getX() && this.topLeft.getX() + this.getWidth() >= other.getTopLeft().getX() + other.getWidth() && this.getTopLeft().getY() <= other.getTopLeft().getY() && this.getTopLeft().getY() + this.getHeight() >= other.getTopLeft().getY() + other.getHeight(); } } diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionGrid.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionGrid.java index ea5acb95..362b5c5c 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionGrid.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionGrid.java @@ -1,13 +1,13 @@ package com.iqser.red.service.redaction.v1.model; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + @Data @AllArgsConstructor @NoArgsConstructor diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionRectangle.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionRectangle.java index b96e3572..38031b36 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionRectangle.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionRectangle.java @@ -1,13 +1,13 @@ package com.iqser.red.service.redaction.v1.model; -import java.util.List; - import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; import lombok.NonNull; import lombok.RequiredArgsConstructor; +import java.util.List; + @Data @AllArgsConstructor @NoArgsConstructor diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionText.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionText.java deleted file mode 100644 index 3c8fa02a..00000000 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/SectionText.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.iqser.red.service.redaction.v1.model; - -import java.util.ArrayList; -import java.util.List; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class SectionText { - - private int sectionNumber; - private String text; - - private boolean isTable; - private String headline; - - private List sectionAreas = new ArrayList<>(); - -} diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/resources/RedactionResource.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/resources/RedactionResource.java index b58dcd9c..7b6c1952 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/resources/RedactionResource.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/resources/RedactionResource.java @@ -1,14 +1,6 @@ package com.iqser.red.service.redaction.v1.resources; -import com.iqser.red.service.redaction.v1.model.AnalyzeRequest; -import com.iqser.red.service.redaction.v1.model.AnalyzeResult; -import com.iqser.red.service.redaction.v1.model.AnnotateRequest; -import com.iqser.red.service.redaction.v1.model.AnnotateResponse; -import com.iqser.red.service.redaction.v1.model.ReanalyzeResult; -import com.iqser.red.service.redaction.v1.model.RedactionRequest; -import com.iqser.red.service.redaction.v1.model.RedactionResult; -import com.iqser.red.service.redaction.v1.model.RenalyzeRequest; - +import com.iqser.red.service.redaction.v1.model.*; import org.springframework.http.MediaType; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.PostMapping; @@ -25,7 +17,7 @@ public interface RedactionResource { AnalyzeResult analyze(@RequestBody AnalyzeRequest analyzeRequest); @PostMapping(value = "/reanalyze", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE) - ReanalyzeResult reanalyze(@RequestBody RenalyzeRequest renalyzeRequest); + AnalyzeResult reanalyze(@RequestBody AnalyzeRequest renalyzeRequest); @PostMapping(value = "/annotate", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE) AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest); @@ -39,10 +31,10 @@ public interface RedactionResource { @PostMapping(value = "/debug/htmlTables", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE) RedactionResult htmlTables(@RequestBody RedactionRequest redactionRequest); - @PostMapping(value = "/rules/update"+RULE_SET_PATH_VARIABLE, consumes = MediaType.APPLICATION_JSON_VALUE) + @PostMapping(value = "/rules/update" + RULE_SET_PATH_VARIABLE, consumes = MediaType.APPLICATION_JSON_VALUE) void updateRules(@PathVariable(RULE_SET_PARAMETER_NAME) String ruleSetId); @PostMapping(value = "/rules/test", consumes = MediaType.APPLICATION_JSON_VALUE) void testRules(@RequestBody String rules); -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/pom.xml b/redaction-service-v1/redaction-service-server-v1/pom.xml index b76c67d7..8f8d8db8 100644 --- a/redaction-service-v1/redaction-service-server-v1/pom.xml +++ b/redaction-service-v1/redaction-service-server-v1/pom.xml @@ -12,6 +12,10 @@ redaction-service-server-v1 + + com.iqser.red.commons + storage-commons + com.iqser.red.service redaction-service-api-v1 @@ -20,7 +24,12 @@ com.iqser.red.service configuration-service-api-v1 - 2.2.9 + 2.5.0 + + + com.iqser.red.service + file-management-service-api-v1 + 2.6.7 org.drools diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/Application.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/Application.java index f591b03f..d3933b0c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/Application.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/Application.java @@ -1,5 +1,8 @@ package com.iqser.red.service.redaction.v1.server; +import com.iqser.red.commons.spring.DefaultWebMvcConfiguration; +import com.iqser.red.service.redaction.v1.server.client.RulesClient; +import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings; import org.springframework.boot.SpringApplication; import org.springframework.boot.actuate.autoconfigure.security.servlet.ManagementWebSecurityAutoConfiguration; import org.springframework.boot.autoconfigure.SpringBootApplication; @@ -8,10 +11,6 @@ import org.springframework.boot.context.properties.EnableConfigurationProperties import org.springframework.cloud.openfeign.EnableFeignClients; import org.springframework.context.annotation.Import; -import com.iqser.red.commons.spring.DefaultWebMvcConfiguration; -import com.iqser.red.service.redaction.v1.server.client.RulesClient; -import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings; - @Import({DefaultWebMvcConfiguration.class}) @EnableFeignClients(basePackageClasses = RulesClient.class) @EnableConfigurationProperties(RedactionServiceSettings.class) @@ -23,4 +22,4 @@ public class Application { } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Document.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Document.java index 0fb41529..c78ebccd 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Document.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Document.java @@ -1,20 +1,18 @@ package com.iqser.red.service.redaction.v1.server.classification.model; +import com.iqser.red.service.redaction.v1.model.RedactionLogEntry; +import com.iqser.red.service.redaction.v1.model.SectionGrid; +import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; +import com.iqser.red.service.redaction.v1.server.redaction.model.Image; +import lombok.Data; +import lombok.NoArgsConstructor; + import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; -import com.iqser.red.service.redaction.v1.model.RedactionLogEntry; -import com.iqser.red.service.redaction.v1.model.SectionGrid; -import com.iqser.red.service.redaction.v1.model.SectionText; -import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; -import com.iqser.red.service.redaction.v1.server.redaction.model.Image; - -import lombok.Data; -import lombok.NoArgsConstructor; - @Data @NoArgsConstructor public class Document { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/FloatFrequencyCounter.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/FloatFrequencyCounter.java index 6828bd38..c232339b 100755 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/FloatFrequencyCounter.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/FloatFrequencyCounter.java @@ -1,5 +1,7 @@ package com.iqser.red.service.redaction.v1.server.classification.model; +import lombok.Getter; + import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -7,38 +9,35 @@ import java.util.List; import java.util.Map; import java.util.stream.Collectors; -import lombok.Getter; - -public class FloatFrequencyCounter -{ +public class FloatFrequencyCounter { @Getter Map countPerValue = new HashMap<>(); - public void add(float value){ - if(!countPerValue.containsKey(value)){ + public void add(float value) { + if (!countPerValue.containsKey(value)) { countPerValue.put(value, 1); } else { countPerValue.put(value, countPerValue.get(value) + 1); } } - public void addAll(Map otherCounter){ - for(Map.Entry entry: otherCounter.entrySet()){ - if(countPerValue.containsKey(entry.getKey())){ - countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey())+ entry.getValue()); + public void addAll(Map otherCounter) { + for (Map.Entry entry : otherCounter.entrySet()) { + if (countPerValue.containsKey(entry.getKey())) { + countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey()) + entry.getValue()); } else { countPerValue.put(entry.getKey(), entry.getValue()); } } } - public Float getMostPopular(){ + public Float getMostPopular() { Map.Entry mostPopular = null; - for(Map.Entry entry: countPerValue.entrySet()){ - if(mostPopular == null){ + for (Map.Entry entry : countPerValue.entrySet()) { + if (mostPopular == null) { mostPopular = entry; - } else if(entry.getValue() >= mostPopular.getValue()){ + } else if (entry.getValue() >= mostPopular.getValue()) { mostPopular = entry; } } @@ -46,12 +45,11 @@ public class FloatFrequencyCounter } - - public List getHighterThanMostPopular(){ + public List getHighterThanMostPopular() { Float mostPopular = getMostPopular(); List higher = new ArrayList<>(); - for(Float value: countPerValue.keySet()){ - if(value > mostPopular){ + for (Float value : countPerValue.keySet()) { + if (value > mostPopular) { higher.add(value); } } @@ -60,12 +58,12 @@ public class FloatFrequencyCounter } - public Float getHighest(){ + public Float getHighest() { Float highest = null; - for(Float value: countPerValue.keySet()){ - if (highest == null){ + for (Float value : countPerValue.keySet()) { + if (highest == null) { highest = value; - } else if(value > highest){ + } else if (value > highest) { highest = value; } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Footer.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Footer.java index 37b691e6..61d12a43 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Footer.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Footer.java @@ -1,12 +1,11 @@ package com.iqser.red.service.redaction.v1.server.classification.model; -import java.util.List; - import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText; - import lombok.AllArgsConstructor; import lombok.Data; +import java.util.List; + @Data @AllArgsConstructor public class Footer { @@ -21,4 +20,4 @@ public class Footer { return searchableText; } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Header.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Header.java index 8a4b67ae..f3067452 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Header.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Header.java @@ -1,12 +1,11 @@ package com.iqser.red.service.redaction.v1.server.classification.model; -import java.util.List; - import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText; - import lombok.AllArgsConstructor; import lombok.Data; +import java.util.List; + @Data @AllArgsConstructor public class Header { @@ -21,4 +20,4 @@ public class Header { return searchableText; } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Page.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Page.java index af07f19f..873ae8a1 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Page.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Page.java @@ -1,15 +1,14 @@ package com.iqser.red.service.redaction.v1.server.classification.model; -import java.util.List; - import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage; import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle; - import lombok.Data; import lombok.NonNull; import lombok.RequiredArgsConstructor; +import java.util.List; + @Data @RequiredArgsConstructor public class Page { @@ -37,4 +36,4 @@ public class Page { return rotation != 0; } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Paragraph.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Paragraph.java index 07e6b6fa..5a661126 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Paragraph.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Paragraph.java @@ -1,19 +1,18 @@ package com.iqser.red.service.redaction.v1.server.classification.model; -import java.util.ArrayList; -import java.util.List; - import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage; import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText; import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; - import lombok.Data; import lombok.NoArgsConstructor; +import java.util.ArrayList; +import java.util.List; + @Data @NoArgsConstructor -public class Paragraph implements Comparable{ +public class Paragraph implements Comparable { private List pageBlocks = new ArrayList<>(); private List images = new ArrayList<>(); @@ -62,4 +61,4 @@ public class Paragraph implements Comparable{ return 0; } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/SectionText.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/SectionText.java new file mode 100644 index 00000000..c9c88cec --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/SectionText.java @@ -0,0 +1,45 @@ +package com.iqser.red.service.redaction.v1.server.classification.model; + +import com.iqser.red.service.redaction.v1.model.SectionArea; +import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue; +import com.iqser.red.service.redaction.v1.server.redaction.model.Image; +import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.util.*; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class SectionText { + + private int sectionNumber; + private String text; + + private boolean isTable; + private String headline; + + private List sectionAreas = new ArrayList<>(); + private Set images = new HashSet<>(); + + private List textBlocks = new ArrayList<>(); + private Map tabularData = new HashMap<>(); + private List cellStarts = new ArrayList<>(); + + + public SearchableText getSearchableText() { + + SearchableText searchableText = new SearchableText(); + textBlocks.forEach(block -> { + if (block != null) { + searchableText.addAll(block.getSequences()); + } + }); + return searchableText; + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/StringFrequencyCounter.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/StringFrequencyCounter.java index 0cbdfcc0..8aeb451d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/StringFrequencyCounter.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/StringFrequencyCounter.java @@ -1,10 +1,10 @@ package com.iqser.red.service.redaction.v1.server.classification.model; +import lombok.Getter; + import java.util.HashMap; import java.util.Map; -import lombok.Getter; - public class StringFrequencyCounter { @Getter @@ -46,4 +46,4 @@ public class StringFrequencyCounter { return mostPopular != null ? mostPopular.getKey() : null; } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/Text.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Text.java similarity index 72% rename from redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/Text.java rename to redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Text.java index a2bc00e9..4df1ace3 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/Text.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Text.java @@ -1,17 +1,18 @@ -package com.iqser.red.service.redaction.v1.model; - -import java.util.ArrayList; -import java.util.List; +package com.iqser.red.service.redaction.v1.server.classification.model; import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; +import java.util.ArrayList; +import java.util.List; + @Data @NoArgsConstructor @AllArgsConstructor public class Text { + private int numberOfPages; private List sectionTexts = new ArrayList<>(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/TextBlock.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/TextBlock.java index 0b3b253c..6da9f6a0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/TextBlock.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/TextBlock.java @@ -1,16 +1,15 @@ package com.iqser.red.service.redaction.v1.server.classification.model; -import java.util.ArrayList; -import java.util.List; - import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; -import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities; - +import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; +import java.util.ArrayList; +import java.util.List; + @AllArgsConstructor @Builder @Data @@ -98,7 +97,6 @@ public class TextBlock extends AbstractTextContainer { } - @Override public String toString() { @@ -139,4 +137,4 @@ public class TextBlock extends AbstractTextContainer { } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/UnclassifiedText.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/UnclassifiedText.java index bfe56052..79277b9e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/UnclassifiedText.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/UnclassifiedText.java @@ -1,12 +1,11 @@ package com.iqser.red.service.redaction.v1.server.classification.model; -import java.util.List; - import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText; - import lombok.AllArgsConstructor; import lombok.Data; +import java.util.List; + @Data @AllArgsConstructor public class UnclassifiedText { @@ -21,4 +20,4 @@ public class UnclassifiedText { return searchableText; } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/BlockificationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/BlockificationService.java index d4b83409..4badfec4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/BlockificationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/BlockificationService.java @@ -1,21 +1,20 @@ package com.iqser.red.service.redaction.v1.server.classification.service; -import java.util.ArrayList; -import java.util.List; - -import org.springframework.stereotype.Service; - -import com.iqser.red.service.redaction.v1.server.classification.utils.PositionUtils; import com.iqser.red.service.redaction.v1.server.classification.model.FloatFrequencyCounter; import com.iqser.red.service.redaction.v1.server.classification.model.Page; import com.iqser.red.service.redaction.v1.server.classification.model.StringFrequencyCounter; import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; +import com.iqser.red.service.redaction.v1.server.classification.utils.PositionUtils; import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; +import org.springframework.stereotype.Service; + +import java.util.ArrayList; +import java.util.List; @Service @SuppressWarnings("all") diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/ClassificationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/ClassificationService.java index 362ba551..1e72fd52 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/ClassificationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/ClassificationService.java @@ -1,19 +1,17 @@ package com.iqser.red.service.redaction.v1.server.classification.service; -import java.util.List; -import java.util.regex.Pattern; - -import org.springframework.stereotype.Service; - import com.iqser.red.service.redaction.v1.server.classification.model.Document; import com.iqser.red.service.redaction.v1.server.classification.model.Page; import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; import com.iqser.red.service.redaction.v1.server.classification.utils.PositionUtils; import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle; - import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; + +import java.util.List; +import java.util.regex.Pattern; @Slf4j @Service diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/utils/PositionUtils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/utils/PositionUtils.java index ce421eb6..98117bbe 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/utils/PositionUtils.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/utils/PositionUtils.java @@ -2,7 +2,6 @@ package com.iqser.red.service.redaction.v1.server.classification.utils; import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle; - import lombok.experimental.UtilityClass; @UtilityClass diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/MockMultipartFile.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/MockMultipartFile.java index 5dc671e4..0951ffcd 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/MockMultipartFile.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/client/MockMultipartFile.java @@ -1,16 +1,16 @@ package com.iqser.red.service.redaction.v1.server.client; -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; - import org.springframework.lang.NonNull; import org.springframework.lang.Nullable; import org.springframework.util.Assert; import org.springframework.util.FileCopyUtils; import org.springframework.web.multipart.MultipartFile; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; + public class MockMultipartFile implements MultipartFile { private final String name; @@ -22,13 +22,13 @@ public class MockMultipartFile implements MultipartFile { public MockMultipartFile(String name, @Nullable byte[] content) { - this(name, "", (String) null, (byte[]) content); + this(name, "", null, content); } public MockMultipartFile(String name, InputStream contentStream) throws IOException { - this(name, "", (String) null, (byte[]) FileCopyUtils.copyToByteArray(contentStream)); + this(name, "", null, FileCopyUtils.copyToByteArray(contentStream)); } @@ -78,7 +78,7 @@ public class MockMultipartFile implements MultipartFile { public long getSize() { - return (long) this.content.length; + return this.content.length; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/ControllerAdvice.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/ControllerAdvice.java index fe76646e..3262faf2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/ControllerAdvice.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/ControllerAdvice.java @@ -1,17 +1,15 @@ package com.iqser.red.service.redaction.v1.server.controller; -import java.time.OffsetDateTime; - import com.iqser.red.commons.spring.ErrorMessage; +import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException; +import lombok.extern.slf4j.Slf4j; import org.springframework.http.HttpStatus; import org.springframework.web.bind.annotation.ExceptionHandler; import org.springframework.web.bind.annotation.ResponseBody; import org.springframework.web.bind.annotation.ResponseStatus; import org.springframework.web.bind.annotation.RestControllerAdvice; -import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException; - -import lombok.extern.slf4j.Slf4j; +import java.time.OffsetDateTime; @Slf4j @RestControllerAdvice @@ -38,4 +36,4 @@ public class ControllerAdvice { return new ErrorMessage(OffsetDateTime.now(), e.getMessage()); } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java index 97a152f4..ed55ff17 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/controller/RedactionController.java @@ -1,17 +1,7 @@ package com.iqser.red.service.redaction.v1.server.controller; -import com.iqser.red.service.redaction.v1.model.AnalyzeRequest; -import com.iqser.red.service.redaction.v1.model.AnalyzeResult; -import com.iqser.red.service.redaction.v1.model.AnnotateRequest; -import com.iqser.red.service.redaction.v1.model.AnnotateResponse; -import com.iqser.red.service.redaction.v1.model.ReanalyzeResult; -import com.iqser.red.service.redaction.v1.model.RedactionLog; -import com.iqser.red.service.redaction.v1.model.RedactionLogEntry; -import com.iqser.red.service.redaction.v1.model.RedactionRequest; -import com.iqser.red.service.redaction.v1.model.RedactionResult; -import com.iqser.red.service.redaction.v1.model.RenalyzeRequest; -import com.iqser.red.service.redaction.v1.model.SectionGrid; -import com.iqser.red.service.redaction.v1.model.Text; +import com.iqser.red.service.file.management.v1.api.model.FileType; +import com.iqser.red.service.redaction.v1.model.*; import com.iqser.red.service.redaction.v1.resources.RedactionResource; import com.iqser.red.service.redaction.v1.server.classification.model.Document; import com.iqser.red.service.redaction.v1.server.classification.model.Page; @@ -19,27 +9,21 @@ import com.iqser.red.service.redaction.v1.server.exception.RedactionException; import com.iqser.red.service.redaction.v1.server.redaction.service.AnnotationService; import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService; import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService; -import com.iqser.red.service.redaction.v1.server.redaction.service.EntityRedactionService; -import com.iqser.red.service.redaction.v1.server.redaction.service.ImageClassificationService; import com.iqser.red.service.redaction.v1.server.redaction.service.ReanalyzeService; -import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogCreatorService; import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService; +import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService; - import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; - import org.apache.pdfbox.pdmodel.PDDocument; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.RequestBody; import org.springframework.web.bind.annotation.RestController; -import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.util.List; @Slf4j @RestController @@ -47,61 +31,36 @@ import java.util.List; public class RedactionController implements RedactionResource { private final PdfVisualisationService pdfVisualisationService; - private final PdfSegmentationService pdfSegmentationService; - private final RedactionLogCreatorService redactionLogCreatorService; - private final EntityRedactionService entityRedactionService; private final DroolsExecutionService droolsExecutionService; private final DictionaryService dictionaryService; private final AnnotationService annotationService; private final ReanalyzeService reanalyzeService; - private final ImageClassificationService imageClassificationService; - + private final PdfSegmentationService pdfSegmentationService; + private final RedactionStorageService redactionStorageService; @Override public AnalyzeResult analyze(@RequestBody AnalyzeRequest analyzeRequest) { - - try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(analyzeRequest.getDocument()))) { - pdDocument.setAllSecurityToBeRemoved(true); - - Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument); - - log.info("Document structure analysis successful, starting redaction analysis..."); - - imageClassificationService.classifyImages(classifiedDoc); - entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getRuleSetId(), analyzeRequest.getManualRedactions()); - redactionLogCreatorService.createRedactionLog(classifiedDoc, pdDocument.getNumberOfPages(), analyzeRequest.getManualRedactions(), analyzeRequest - .getRuleSetId()); - - log.info("Redaction analysis successful..."); - - return AnalyzeResult.builder() - .sectionGrid(classifiedDoc.getSectionGrid()) - .redactionLog(new RedactionLog(classifiedDoc.getRedactionLogEntities(), classifiedDoc.getDictionaryVersion(), classifiedDoc - .getRulesVersion(), analyzeRequest.getRuleSetId())) - .numberOfPages(classifiedDoc.getPages().size()) - .text(new Text(classifiedDoc.getSectionText())) - .build(); - - } catch (Exception e) { - throw new RedactionException(e); - } + return reanalyzeService.analyze(analyzeRequest); } - - public ReanalyzeResult reanalyze(@RequestBody RenalyzeRequest renalyzeRequest) { - - return reanalyzeService.reanalyze(renalyzeRequest); + @Override + public AnalyzeResult reanalyze(@RequestBody AnalyzeRequest analyzeRequest) { + return reanalyzeService.reanalyze(analyzeRequest); } public AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest) { - try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(annotateRequest.getDocument()))) { + var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(annotateRequest.getProjectId(), annotateRequest.getFileId(), FileType.ORIGIN)); + var redactionLog = redactionStorageService.getRedactionLog(annotateRequest.getProjectId(), annotateRequest.getFileId()); + var sectionsGrid = redactionStorageService.getSectionGrid(annotateRequest.getProjectId(), annotateRequest.getFileId()); + + try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) { pdDocument.setAllSecurityToBeRemoved(true); - dictionaryService.updateDictionary(annotateRequest.getRedactionLog().getRuleSetId()); - annotationService.annotate(pdDocument, annotateRequest.getRedactionLog(), annotateRequest.getSectionGrid()); + dictionaryService.updateDictionary(redactionLog.getRuleSetId()); + annotationService.annotate(pdDocument, redactionLog, sectionsGrid); try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) { pdDocument.save(byteArrayOutputStream); @@ -115,15 +74,16 @@ public class RedactionController implements RedactionResource { @Override - public RedactionResult classify(@RequestBody RedactionRequest pdfSegmentationRequest) { + public RedactionResult classify(@RequestBody RedactionRequest redactionRequest) { + var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getProjectId(), redactionRequest.getFileId(), FileType.ORIGIN)); - try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(pdfSegmentationRequest.getDocument()))) { + try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) { pdDocument.setAllSecurityToBeRemoved(true); Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument); pdfVisualisationService.visualizeClassifications(classifiedDoc, pdDocument); - return convert(pdDocument, classifiedDoc.getPages().size(), pdfSegmentationRequest.getRuleSetId()); + return convert(pdDocument, classifiedDoc.getPages().size()); } catch (IOException e) { throw new RedactionException(e); @@ -134,14 +94,15 @@ public class RedactionController implements RedactionResource { @Override public RedactionResult sections(@RequestBody RedactionRequest redactionRequest) { + var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getProjectId(), redactionRequest.getFileId(), FileType.ORIGIN)); - try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) { + try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) { pdDocument.setAllSecurityToBeRemoved(true); Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument); pdfVisualisationService.visualizeParagraphs(classifiedDoc, pdDocument); - return convert(pdDocument, classifiedDoc.getPages().size(), redactionRequest.getRuleSetId()); + return convert(pdDocument, classifiedDoc.getPages().size()); } catch (IOException e) { throw new RedactionException(e); @@ -153,27 +114,29 @@ public class RedactionController implements RedactionResource { @Override public RedactionResult htmlTables(@RequestBody RedactionRequest redactionRequest) { - try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) { + var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getProjectId(), redactionRequest.getFileId(), FileType.ORIGIN)); + + Document classifiedDoc; + try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) { pdDocument.setAllSecurityToBeRemoved(true); - - Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument); - - StringBuilder sb = new StringBuilder(); - for (Page page : classifiedDoc.getPages()) { - for (AbstractTextContainer textContainer : page.getTextBlocks()) { - if (textContainer instanceof Table) { - Table table = (Table) textContainer; - sb.append(table.getTextAsHtml()).append("
").append("
"); - } - } - } - - return RedactionResult.builder().document(sb.toString().getBytes()).build(); - - } catch (IOException e) { + classifiedDoc = pdfSegmentationService.parseDocument(pdDocument); + } catch (Exception e) { throw new RedactionException(e); } + + StringBuilder sb = new StringBuilder(); + for (Page page : classifiedDoc.getPages()) { + for (AbstractTextContainer textContainer : page.getTextBlocks()) { + if (textContainer instanceof Table) { + Table table = (Table) textContainer; + sb.append(table.getTextAsHtml()).append("
").append("
"); + } + } + } + + return RedactionResult.builder().document(sb.toString().getBytes()).build(); + } @@ -191,23 +154,13 @@ public class RedactionController implements RedactionResource { } - private RedactionResult convert(PDDocument document, int numberOfPages, String ruleSetId) throws IOException { - - return convert(document, numberOfPages, null, null, 0, 0, ruleSetId); - } - - - private RedactionResult convert(PDDocument document, int numberOfPages, - List redactionLogEntities, SectionGrid sectionGrid, - long dictionaryVersion, long rulesVersion, String ruleSetId) throws IOException { + private RedactionResult convert(PDDocument document, int numberOfPages) throws IOException { try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) { document.save(byteArrayOutputStream); return RedactionResult.builder() .document(byteArrayOutputStream.toByteArray()) .numberOfPages(numberOfPages) - .redactionLog(new RedactionLog(redactionLogEntities, dictionaryVersion, rulesVersion, ruleSetId)) - .sectionGrid(sectionGrid) .build(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFAreaTextStripper.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFAreaTextStripper.java index 7e2e56c8..9b52bf7b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFAreaTextStripper.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFAreaTextStripper.java @@ -1,17 +1,15 @@ package com.iqser.red.service.redaction.v1.server.parsing; +import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; +import lombok.Getter; +import lombok.Setter; +import org.apache.pdfbox.text.PDFTextStripperByArea; +import org.apache.pdfbox.text.TextPosition; + import java.io.IOException; import java.util.ArrayList; import java.util.List; -import org.apache.pdfbox.text.PDFTextStripperByArea; -import org.apache.pdfbox.text.TextPosition; - -import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; - -import lombok.Getter; -import lombok.Setter; - public class PDFAreaTextStripper extends PDFTextStripperByArea { @Getter @@ -76,7 +74,7 @@ public class PDFAreaTextStripper extends PDFTextStripperByArea { } - public void clearPositions(){ + public void clearPositions() { textPositionSequences = new ArrayList<>(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java index 4b680c32..26933528 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java @@ -1,33 +1,15 @@ package com.iqser.red.service.redaction.v1.server.parsing; -import java.awt.geom.AffineTransform; -import java.awt.geom.Point2D; -import java.awt.geom.Rectangle2D; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - +import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; +import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage; +import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling; +import lombok.Getter; +import lombok.Setter; +import lombok.extern.slf4j.Slf4j; import org.apache.pdfbox.contentstream.operator.Operator; import org.apache.pdfbox.contentstream.operator.OperatorName; -import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColor; -import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorN; -import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorSpace; -import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceCMYKColor; -import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceGrayColor; -import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceRGBColor; -import org.apache.pdfbox.contentstream.operator.color.SetStrokingColor; -import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorN; -import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorSpace; -import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceCMYKColor; -import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceGrayColor; -import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceRGBColor; -import org.apache.pdfbox.contentstream.operator.state.SetFlatness; -import org.apache.pdfbox.contentstream.operator.state.SetLineCapStyle; -import org.apache.pdfbox.contentstream.operator.state.SetLineDashPattern; -import org.apache.pdfbox.contentstream.operator.state.SetLineJoinStyle; -import org.apache.pdfbox.contentstream.operator.state.SetLineMiterLimit; -import org.apache.pdfbox.contentstream.operator.state.SetLineWidth; -import org.apache.pdfbox.contentstream.operator.state.SetRenderingIntent; +import org.apache.pdfbox.contentstream.operator.color.*; +import org.apache.pdfbox.contentstream.operator.state.*; import org.apache.pdfbox.contentstream.operator.text.SetFontAndSize; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSName; @@ -40,40 +22,31 @@ import org.apache.pdfbox.text.PDFTextStripper; import org.apache.pdfbox.text.TextPosition; import org.apache.pdfbox.util.Matrix; -import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; -import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage; -import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling; - -import lombok.Getter; -import lombok.Setter; -import lombok.extern.slf4j.Slf4j; +import java.awt.geom.AffineTransform; +import java.awt.geom.Point2D; +import java.awt.geom.Rectangle2D; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; @Slf4j public class PDFLinesTextStripper extends PDFTextStripper { - @Setter - protected PDPage pdpage; - - @Getter - private int minCharWidth; - - @Getter - private int maxCharWidth; - - @Getter - private int minCharHeight; - - @Getter - private int maxCharHeight; - @Getter private final List textPositionSequences = new ArrayList<>(); - @Getter private final List rulings = new ArrayList<>(); - private final List graphicsPath = new ArrayList<>(); - + @Setter + protected PDPage pdpage; + @Getter + private int minCharWidth; + @Getter + private int maxCharWidth; + @Getter + private int minCharHeight; + @Getter + private int maxCharHeight; @Getter private List images = new ArrayList<>(); @@ -369,4 +342,4 @@ public class PDFLinesTextStripper extends PDFTextStripper { } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/TextPositionSequence.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/TextPositionSequence.java index dbcc1e18..c6181f4e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/TextPositionSequence.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/TextPositionSequence.java @@ -1,23 +1,20 @@ package com.iqser.red.service.redaction.v1.server.parsing.model; -import java.util.ArrayList; -import java.util.List; - -import org.apache.pdfbox.text.TextPosition; - import com.iqser.red.service.redaction.v1.model.Point; import com.iqser.red.service.redaction.v1.model.Rectangle; - import lombok.Data; import lombok.RequiredArgsConstructor; +import org.apache.pdfbox.text.TextPosition; + +import java.util.ArrayList; +import java.util.List; @Data @RequiredArgsConstructor public class TextPositionSequence implements CharSequence { - private List textPositions = new ArrayList<>(); - private final int page; + private List textPositions = new ArrayList<>(); public TextPositionSequence(List textPositions, int page) { @@ -223,4 +220,4 @@ public class TextPositionSequence implements CharSequence { return new Rectangle(new Point(posXInit, posYInit), posXEnd - posXInit, posYEnd - posYInit + height, page); } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/CellValue.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/CellValue.java index c566bbb5..6d65518c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/CellValue.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/CellValue.java @@ -1,14 +1,13 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; -import java.util.Iterator; -import java.util.List; - import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities; - import lombok.Value; +import java.util.Iterator; +import java.util.List; + @Value public class CellValue { @@ -47,4 +46,4 @@ public class CellValue { .replaceAll(" {2}", " "); } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java index c1fd7719..c8d514d1 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Dictionary.java @@ -1,13 +1,13 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; +import lombok.Data; +import lombok.Getter; + import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; -import lombok.Data; -import lombok.Getter; - @Data public class Dictionary { @@ -21,15 +21,15 @@ public class Dictionary { private long version; - public Dictionary(List dictionaryModels, long dictionaryVersion){ + public Dictionary(List dictionaryModels, long dictionaryVersion) { this.dictionaryModels = dictionaryModels; this.dictionaryModels.forEach(dm -> localAccessMap.put(dm.getType(), dm)); this.version = dictionaryVersion; } - public int getDictionaryRank(String type){ - if(!localAccessMap.containsKey(type)){ + public int getDictionaryRank(String type) { + if (!localAccessMap.containsKey(type)) { return 0; } return localAccessMap.get(type).getRank(); @@ -60,7 +60,7 @@ public class Dictionary { public boolean containsValue(String type, String value) { - if (localAccessMap.containsKey(type) && localAccessMap.get(type) + return localAccessMap.containsKey(type) && localAccessMap.get(type) .getEntries() .contains(value) || localAccessMap.containsKey(type) && localAccessMap.get(type) .getLocalEntries() @@ -68,10 +68,7 @@ public class Dictionary { .getEntries() .contains(value) || localAccessMap.containsKey(RECOMMENDATION_PREFIX + type) && localAccessMap.get(RECOMMENDATION_PREFIX + type) .getLocalEntries() - .contains(value)) { - return true; - } - return false; + .contains(value); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryIncrement.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryIncrement.java index 2366527e..c17862cb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryIncrement.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryIncrement.java @@ -1,10 +1,10 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; -import java.util.Set; - import lombok.AllArgsConstructor; import lombok.Data; +import java.util.Set; + @Data @AllArgsConstructor public class DictionaryIncrement { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java index c4b0ce7c..3fd3f3d4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryModel.java @@ -1,15 +1,14 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; +import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry; +import lombok.AllArgsConstructor; +import lombok.Data; + import java.io.Serializable; import java.util.Set; import java.util.stream.Collectors; -import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry; - -import lombok.AllArgsConstructor; -import lombok.Data; - @Data @AllArgsConstructor public class DictionaryModel implements Serializable { @@ -23,8 +22,8 @@ public class DictionaryModel implements Serializable { private Set entries; private Set localEntries; - public Set getValues(boolean local){ - return local ? localEntries : entries.stream().filter(e -> !e.isDeleted()).map(e-> e.getValue()).collect(Collectors + public Set getValues(boolean local) { + return local ? localEntries : entries.stream().filter(e -> !e.isDeleted()).map(e -> e.getValue()).collect(Collectors .toSet()); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryRepresentation.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryRepresentation.java index 615e4dda..0f7b6820 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryRepresentation.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/DictionaryRepresentation.java @@ -20,5 +20,4 @@ public class DictionaryRepresentation { private Map localAccessMap = new HashMap<>(); - } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java index 8c8c8952..373a772b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Entity.java @@ -1,13 +1,12 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; +import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; +import lombok.Data; +import lombok.EqualsAndHashCode; + import java.util.ArrayList; import java.util.List; -import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; - -import lombok.Data; -import lombok.EqualsAndHashCode; - @Data @EqualsAndHashCode(onlyExplicitlyIncluded = true) public class Entity { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/EntityPositionSequence.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/EntityPositionSequence.java index 9bd0fb38..6784707d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/EntityPositionSequence.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/EntityPositionSequence.java @@ -1,24 +1,23 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; -import java.util.ArrayList; -import java.util.List; - import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; - import lombok.AllArgsConstructor; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.RequiredArgsConstructor; +import java.util.ArrayList; +import java.util.List; + @Data @RequiredArgsConstructor @AllArgsConstructor @EqualsAndHashCode public class EntityPositionSequence { + private final String id; @EqualsAndHashCode.Exclude private List sequences = new ArrayList<>(); private int pageNumber; - private final String id; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Image.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Image.java index 377fd55b..e4e6167a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Image.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Image.java @@ -1,12 +1,12 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; -import java.awt.geom.Rectangle2D; - import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.NoArgsConstructor; +import java.awt.geom.Rectangle2D; + @Data @Builder @NoArgsConstructor diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/PdfImage.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/PdfImage.java index 86cabcfa..4e60f6e4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/PdfImage.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/PdfImage.java @@ -1,14 +1,14 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; -import java.awt.geom.Rectangle2D; -import java.awt.image.BufferedImage; - import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; import lombok.NonNull; import lombok.RequiredArgsConstructor; +import java.awt.geom.Rectangle2D; +import java.awt.image.BufferedImage; + @Data @NoArgsConstructor @AllArgsConstructor @@ -25,4 +25,4 @@ public class PdfImage { @NonNull private int page; -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/ReanalysisSection.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/ReanalysisSection.java deleted file mode 100644 index be141819..00000000 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/ReanalysisSection.java +++ /dev/null @@ -1,37 +0,0 @@ -package com.iqser.red.service.redaction.v1.server.redaction.model; - -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; - -import lombok.Data; -import lombok.NoArgsConstructor; - -@Data -@NoArgsConstructor -public class ReanalysisSection { - - private int sectionNumber; - private String headline; - private List textBlocks; - private Map tabularData = new HashMap<>(); - private List cellStarts; - private Set images = new HashSet<>(); - - - public SearchableText getSearchableText() { - - SearchableText searchableText = new SearchableText(); - textBlocks.forEach(block -> { - if (block instanceof TextBlock) { - searchableText.addAll(block.getSequences()); - } - }); - return searchableText; - } - -} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java index dfa500ea..b7277c9a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java @@ -1,14 +1,14 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; +import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; +import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder; +import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities; + import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.regex.Pattern; -import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; -import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder; -import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities; - public class SearchableText { private final List sequences = new ArrayList<>(); @@ -232,4 +232,4 @@ public class SearchableText { return sb.append("\n").toString(); } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index 8dc46fee..0ab77f48 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -1,6 +1,12 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; -import static com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary.RECOMMENDATION_PREFIX; +import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; +import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils; +import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns; +import lombok.Builder; +import lombok.Data; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; import java.util.Collection; import java.util.HashMap; @@ -11,15 +17,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; -import org.apache.commons.lang3.StringUtils; - -import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; -import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils; -import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns; - -import lombok.Builder; -import lombok.Data; -import lombok.extern.slf4j.Slf4j; +import static com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary.RECOMMENDATION_PREFIX; @Data @Slf4j diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeResponseService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeResponseService.java new file mode 100644 index 00000000..976ccb96 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnalyzeResponseService.java @@ -0,0 +1,41 @@ +package com.iqser.red.service.redaction.v1.server.redaction.service; + +import com.iqser.red.service.file.management.v1.api.model.RedactionChangeLog; +import com.iqser.red.service.redaction.v1.model.AnalyzeResult; +import com.iqser.red.service.redaction.v1.model.RedactionLog; +import com.iqser.red.service.redaction.v1.model.RedactionLogEntry; +import org.springframework.stereotype.Service; + +@Service +public class AnalyzeResponseService { + + public AnalyzeResult createAnalyzeResponse(int pageCount, RedactionLog redactionLog, RedactionChangeLog redactionChangeLog) { + boolean hasHints = redactionLog.getRedactionLogEntry().stream().anyMatch(RedactionLogEntry::isHint); + + boolean hasRequests = redactionLog.getRedactionLogEntry() + .stream() + .anyMatch(entry -> entry.isManual() && entry.getStatus() + .equals(com.iqser.red.service.redaction.v1.model.Status.REQUESTED)); + + boolean hasRedactions = redactionLog.getRedactionLogEntry() + .stream() + .anyMatch(entry -> entry.isRedacted() && !entry.isManual() || entry.isManual() && entry.getStatus() + .equals(com.iqser.red.service.redaction.v1.model.Status.APPROVED)); + + boolean hasImages = redactionLog.getRedactionLogEntry() + .stream() + .anyMatch(entry -> entry.isHint() && entry.getType().equals("image")); + + boolean hasUpdates = redactionChangeLog != null && redactionChangeLog.getRedactionLogEntry() != null && !redactionChangeLog + .getRedactionLogEntry() + .isEmpty() && redactionChangeLog.getRedactionLogEntry().stream().anyMatch(entry -> !entry.getType().equals("false_positive")); + + return AnalyzeResult.builder() + .numberOfPages(pageCount) + .hasHints(hasHints) + .hasRedactions(hasRedactions) + .hasRequests(hasRequests) + .hasImages(hasImages) + .hasUpdates(hasUpdates).build(); + } +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnnotationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnnotationService.java index 9af17d2f..dd0d1b75 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnnotationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnnotationService.java @@ -1,14 +1,7 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; -import java.awt.Color; -import java.io.IOException; -import java.util.ArrayList; -import java.util.GregorianCalendar; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - +import com.iqser.red.service.redaction.v1.model.*; +import lombok.RequiredArgsConstructor; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; @@ -21,15 +14,14 @@ import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationText; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup; import org.springframework.stereotype.Service; -import com.iqser.red.service.redaction.v1.model.CellRectangle; -import com.iqser.red.service.redaction.v1.model.Comment; -import com.iqser.red.service.redaction.v1.model.Rectangle; -import com.iqser.red.service.redaction.v1.model.RedactionLog; -import com.iqser.red.service.redaction.v1.model.RedactionLogEntry; -import com.iqser.red.service.redaction.v1.model.SectionGrid; -import com.iqser.red.service.redaction.v1.model.SectionRectangle; - -import lombok.RequiredArgsConstructor; +import java.awt.Color; +import java.io.IOException; +import java.util.ArrayList; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; @Service @RequiredArgsConstructor diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java index 8fd948b1..4da063a7 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DictionaryService.java @@ -1,20 +1,5 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; -import java.awt.Color; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; - -import org.apache.commons.collections4.CollectionUtils; -import org.apache.commons.lang3.SerializationUtils; -import org.springframework.stereotype.Service; - import com.iqser.red.service.configuration.v1.api.model.Colors; import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry; import com.iqser.red.service.configuration.v1.api.model.TypeResponse; @@ -25,10 +10,16 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncre import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue; import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel; import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryRepresentation; - import feign.FeignException; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.SerializationUtils; +import org.springframework.stereotype.Service; + +import java.awt.Color; +import java.util.*; +import java.util.stream.Collectors; @Slf4j @Service @@ -37,7 +28,7 @@ public class DictionaryService { private final DictionaryClient dictionaryClient; - private Map dictionariesByRuleSets = new HashMap<>(); + private final Map dictionariesByRuleSets = new HashMap<>(); public long updateDictionary(String ruleSetId) { @@ -212,4 +203,4 @@ public class DictionaryService { return dictionariesByRuleSets.get(ruleSetId).getRequestAddColor(); } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java index 7b8233b5..708efa12 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java @@ -1,11 +1,10 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; -import java.io.ByteArrayInputStream; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; -import java.util.HashMap; -import java.util.Map; - +import com.iqser.red.service.configuration.v1.api.model.RulesResponse; +import com.iqser.red.service.redaction.v1.server.client.RulesClient; +import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException; +import com.iqser.red.service.redaction.v1.server.redaction.model.Section; +import lombok.RequiredArgsConstructor; import org.apache.commons.lang3.StringUtils; import org.kie.api.KieServices; import org.kie.api.builder.KieBuilder; @@ -15,12 +14,11 @@ import org.kie.api.runtime.KieContainer; import org.kie.api.runtime.KieSession; import org.springframework.stereotype.Service; -import com.iqser.red.service.configuration.v1.api.model.RulesResponse; -import com.iqser.red.service.redaction.v1.server.client.RulesClient; -import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException; -import com.iqser.red.service.redaction.v1.server.redaction.model.Section; - -import lombok.RequiredArgsConstructor; +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; @Service @RequiredArgsConstructor @@ -28,9 +26,9 @@ public class DroolsExecutionService { private final RulesClient rulesClient; - private Map kieContainers = new HashMap<>(); + private final Map kieContainers = new HashMap<>(); - private Map rulesVersionPerRuleSetId = new HashMap<>(); + private final Map rulesVersionPerRuleSetId = new HashMap<>(); public KieContainer getKieContainer(String ruleSetId) { @@ -133,4 +131,4 @@ public class DroolsExecutionService { return rulesVersion.longValue(); } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index b26d2111..89ccf4a4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -1,50 +1,27 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -import org.apache.commons.collections4.CollectionUtils; -import org.apache.commons.lang3.StringUtils; -import org.kie.api.runtime.KieContainer; -import org.springframework.stereotype.Service; - import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry; import com.iqser.red.service.redaction.v1.model.ManualRedactions; import com.iqser.red.service.redaction.v1.model.Point; import com.iqser.red.service.redaction.v1.model.Rectangle; import com.iqser.red.service.redaction.v1.model.SectionArea; -import com.iqser.red.service.redaction.v1.model.SectionText; -import com.iqser.red.service.redaction.v1.server.classification.model.Document; -import com.iqser.red.service.redaction.v1.server.classification.model.Footer; -import com.iqser.red.service.redaction.v1.server.classification.model.Header; -import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph; -import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; -import com.iqser.red.service.redaction.v1.server.classification.model.UnclassifiedText; -import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue; +import com.iqser.red.service.redaction.v1.server.classification.model.*; import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary; -import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel; -import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; -import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence; -import com.iqser.red.service.redaction.v1.server.redaction.model.Image; -import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType; -import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage; -import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText; -import com.iqser.red.service.redaction.v1.server.redaction.model.Section; -import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair; +import com.iqser.red.service.redaction.v1.server.redaction.model.*; import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; - import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.kie.api.runtime.KieContainer; +import org.springframework.stereotype.Service; + +import java.util.*; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; +import java.util.stream.Stream; @Slf4j @Service diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ImageClassificationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ImageClassificationService.java index 94dc3a94..6682a23d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ImageClassificationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ImageClassificationService.java @@ -1,21 +1,18 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; -import java.io.ByteArrayOutputStream; -import java.io.IOException; - -import javax.imageio.ImageIO; - -import org.springframework.stereotype.Service; - import com.iqser.red.service.redaction.v1.server.classification.model.Document; import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient; import com.iqser.red.service.redaction.v1.server.client.ImageClassificationResponse; import com.iqser.red.service.redaction.v1.server.client.MockMultipartFile; import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType; import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings; - import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; + +import javax.imageio.ImageIO; +import java.io.ByteArrayOutputStream; +import java.io.IOException; @Slf4j @Service diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java index 5ea56c79..e25cf0ff 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ReanalyzeService.java @@ -1,53 +1,29 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; -import java.awt.geom.Rectangle2D; -import java.io.ByteArrayInputStream; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.Stream; - +import com.iqser.red.service.file.management.v1.api.model.FileType; +import com.iqser.red.service.redaction.v1.model.*; +import com.iqser.red.service.redaction.v1.server.classification.model.Document; +import com.iqser.red.service.redaction.v1.server.classification.model.SectionText; +import com.iqser.red.service.redaction.v1.server.classification.model.Text; +import com.iqser.red.service.redaction.v1.server.exception.RedactionException; +import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary; +import com.iqser.red.service.redaction.v1.server.redaction.model.*; +import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils; +import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService; +import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.kie.api.runtime.KieContainer; import org.springframework.stereotype.Service; import org.springframework.web.bind.annotation.RequestBody; -import com.iqser.red.service.redaction.v1.model.Comment; -import com.iqser.red.service.redaction.v1.model.IdRemoval; -import com.iqser.red.service.redaction.v1.model.ManualForceRedact; -import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry; -import com.iqser.red.service.redaction.v1.model.ManualRedactions; -import com.iqser.red.service.redaction.v1.model.ReanalyzeResult; -import com.iqser.red.service.redaction.v1.model.Rectangle; -import com.iqser.red.service.redaction.v1.model.RedactionLogEntry; -import com.iqser.red.service.redaction.v1.model.RenalyzeRequest; -import com.iqser.red.service.redaction.v1.model.SectionArea; -import com.iqser.red.service.redaction.v1.model.SectionText; -import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; -import com.iqser.red.service.redaction.v1.server.exception.RedactionException; -import com.iqser.red.service.redaction.v1.server.parsing.PDFAreaTextStripper; -import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; -import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue; -import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary; -import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement; -import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; -import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence; -import com.iqser.red.service.redaction.v1.server.redaction.model.Image; -import com.iqser.red.service.redaction.v1.server.redaction.model.ReanalysisSection; -import com.iqser.red.service.redaction.v1.server.redaction.model.Section; -import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair; -import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils; -import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; - -import lombok.RequiredArgsConstructor; +import java.awt.geom.Rectangle2D; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.Stream; +@Slf4j @Service @RequiredArgsConstructor public class ReanalyzeService { @@ -57,13 +33,63 @@ public class ReanalyzeService { private final SurroundingWordsService surroundingWordsService; private final EntityRedactionService entityRedactionService; private final RedactionLogCreatorService redactionLogCreatorService; + private final RedactionStorageService redactionStorageService; + private final PdfSegmentationService pdfSegmentationService; + private final ImageClassificationService imageClassificationService; + private final RedactionChangeLogService redactionChangeLogService; + private final AnalyzeResponseService analyzeResponseService; + + public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) { + var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.ORIGIN)); + + var pageCount = 0; + Document classifiedDoc; + try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) { + pdDocument.setAllSecurityToBeRemoved(true); + pageCount = pdDocument.getNumberOfPages(); + classifiedDoc = pdfSegmentationService.parseDocument(pdDocument); + } catch (Exception e) { + throw new RedactionException(e); + } + log.info("Document structure analysis successful, starting redaction analysis..."); + + imageClassificationService.classifyImages(classifiedDoc); + entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getRuleSetId(), analyzeRequest.getManualRedactions()); + imageClassificationService.classifyImages(classifiedDoc); + redactionLogCreatorService.createRedactionLog(classifiedDoc, pageCount, analyzeRequest.getManualRedactions(), analyzeRequest + .getRuleSetId()); + + log.info("Redaction analysis successful..."); - public ReanalyzeResult reanalyze(@RequestBody RenalyzeRequest renalyzeRequest) { + var redactionLog = new RedactionLog(classifiedDoc.getRedactionLogEntities(), classifiedDoc.getDictionaryVersion(), classifiedDoc + .getRulesVersion(), analyzeRequest.getRuleSetId()); - DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(renalyzeRequest.getRuleSetId(), renalyzeRequest - .getRedactionLog() - .getDictionaryVersion()); + // first create changelog - this only happens when we migrate files analyzed via the old process and we don't want to loose changeLog data + var changeLog = redactionChangeLogService.createAndStoreChangeLog(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), redactionLog); + // store redactionLog + redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLog); + redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.TEXT, new Text(pageCount, classifiedDoc.getSectionText())); + redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, classifiedDoc.getSectionGrid()); + + return analyzeResponseService.createAnalyzeResponse(pageCount, redactionLog, changeLog); + } + + public AnalyzeResult reanalyze(@RequestBody AnalyzeRequest renalyzeRequest) { + var text = redactionStorageService.getText(renalyzeRequest.getProjectId(), renalyzeRequest.getFileId()); + // new procedure was not applied, we need a complete analysis + if (text.getNumberOfPages() == 0) { + return analyze(AnalyzeRequest.builder() + .ruleSetId(renalyzeRequest.getRuleSetId()) + .manualRedactions(renalyzeRequest.getManualRedactions()) + .projectId(renalyzeRequest.getProjectId()) + .fileId(renalyzeRequest.getFileId()) + .build()); + } + var redactionLog = redactionStorageService.getRedactionLog(renalyzeRequest.getProjectId(), renalyzeRequest.getFileId()); + + + DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(renalyzeRequest.getRuleSetId(), redactionLog.getDictionaryVersion()); Set manualForceAndRemoveIds = getForceAndRemoveIds(renalyzeRequest.getManualRedactions()); Map> comments = null; @@ -75,21 +101,21 @@ public class ReanalyzeService { manualAdds = renalyzeRequest.getManualRedactions().getEntriesToAdd(); } - Set sectionsToReanaylse = new HashSet<>(); + Set sectionsToReanalyse = new HashSet<>(); Map> imageEntries = new HashMap<>(); - for (RedactionLogEntry entry : renalyzeRequest.getRedactionLog().getRedactionLogEntry()) { + for (RedactionLogEntry entry : redactionLog.getRedactionLogEntry()) { if (entry.isManual() || manualForceAndRemoveIds.contains(entry.getId())) { - sectionsToReanaylse.add(entry.getSectionNumber()); + sectionsToReanalyse.add(entry.getSectionNumber()); } if (entry.isImage() || entry.getType().equals("image")) { imageEntries.computeIfAbsent(entry.getSectionNumber(), x -> new HashSet<>()).add(convert(entry)); } } - for (SectionText sectionText : renalyzeRequest.getText().getSectionTexts()) { + for (SectionText sectionText : text.getSectionTexts()) { if (EntitySearchUtils.sectionContainsAny(sectionText.getText(), dictionaryIncrement.getValues())) { - sectionsToReanaylse.add(sectionText.getSectionNumber()); + sectionsToReanalyse.add(sectionText.getSectionNumber()); } if (manualAdds != null) { @@ -106,97 +132,30 @@ public class ReanalyzeService { } } - if (sectionsToReanaylse.isEmpty() && (manualAdds == null || manualAdds.isEmpty())) { - renalyzeRequest.getRedactionLog().setDictionaryVersion(dictionaryIncrement.getDictionaryVersion()); - return ReanalyzeResult.builder().redactionLog(renalyzeRequest.getRedactionLog()).build(); + if (sectionsToReanalyse.isEmpty() && (manualAdds == null || manualAdds.isEmpty())) { + redactionLog.setDictionaryVersion(dictionaryIncrement.getDictionaryVersion()); + var changeLog = redactionChangeLogService.createAndStoreChangeLog(renalyzeRequest.getProjectId(), renalyzeRequest.getFileId(), redactionLog); + redactionStorageService.storeObject(renalyzeRequest.getProjectId(), renalyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLog); + return analyzeResponseService.createAnalyzeResponse(text.getNumberOfPages(), redactionLog, changeLog); } - try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(renalyzeRequest.getDocument()))) { + try { - List reanalysisSections = new ArrayList<>(); - for (SectionText sectionText : renalyzeRequest.getText().getSectionTexts()) { + List reanalysisSections = new ArrayList<>(); + for (SectionText sectionText : text.getSectionTexts()) { - if (!sectionsToReanaylse.contains(sectionText.getSectionNumber())) { - continue; + if (sectionsToReanalyse.contains(sectionText.getSectionNumber())) { + reanalysisSections.add(sectionText); } - ReanalysisSection reanalysisSection = new ReanalysisSection(); - reanalysisSection.setHeadline(sectionText.getHeadline()); - reanalysisSection.setSectionNumber(sectionText.getSectionNumber()); - List textBlocks = new ArrayList<>(); - - Map> sectionAreasPerPage = new HashMap<>(); - for (SectionArea sectionArea : sectionText.getSectionAreas()) { - sectionAreasPerPage.computeIfAbsent(sectionArea.getPage(), (x) -> new ArrayList<>()) - .add(sectionArea); - } - - Map tabularData = new HashMap<>(); - List cellStarts = new ArrayList<>(); - for (Integer page : sectionAreasPerPage.keySet()) { - List areasOnPage = sectionAreasPerPage.get(page); - - PDPage pdPage = pdDocument.getPage(page - 1); - PDRectangle cropBox = pdPage.getCropBox(); - PDFAreaTextStripper textStripper = new PDFAreaTextStripper(); - textStripper.setPageNumber(page); - - int cellStart = 0; - for (SectionArea sectionArea : areasOnPage) { - - Rectangle2D rect = null; - if (pdPage.getRotation() == 90) { - rect = new Rectangle2D.Float(sectionArea.getTopLeft().getY(), sectionArea.getTopLeft() - .getX(), sectionArea.getHeight(), sectionArea.getWidth() + 0.001f); - } else { - rect = new Rectangle2D.Float(sectionArea.getTopLeft().getX(), -sectionArea.getTopLeft() - .getY() + cropBox.getUpperRightY() - sectionArea.getHeight(), sectionArea.getWidth(), sectionArea - .getHeight() + 0.001f); - } - - textStripper.addRegion(String.valueOf(1), rect); - textStripper.extractRegions(pdPage); - textStripper.getTextForRegion(String.valueOf(1)); - List positions = textStripper.getTextPositionSequences(); - - TextBlock textBlock = new TextBlock(sectionArea.getTopLeft().getX(), sectionArea.getTopLeft() - .getX() + sectionArea.getWidth(), sectionArea.getTopLeft() - .getY(), sectionArea.getTopLeft().getY() + sectionArea.getHeight(), positions, 0); - - if (sectionText.isTable()) { - Cell cell = new Cell(); - cell.addTextBlock(textBlock); - tabularData.put(sectionArea.getHeader(), new CellValue(cell.getTextBlocks(), cellStart)); - cellStarts.add(cellStart); - cellStart = cellStart + cell.toString().trim().length() + 1; - } - - textBlocks.add(textBlock); - textStripper.clearPositions(); - } - - } - reanalysisSection.setTextBlocks(textBlocks); - reanalysisSection.setTabularData(tabularData); - - if (sectionText.isTable()) { - reanalysisSection.setCellStarts(cellStarts); - } - if (imageEntries.containsKey(sectionText.getSectionNumber())) { - reanalysisSection.getImages().addAll(imageEntries.get(sectionText.getSectionNumber())); - } - - reanalysisSections.add(reanalysisSection); } - //-- - KieContainer kieContainer = droolsExecutionService.updateRules(renalyzeRequest.getRuleSetId()); Dictionary dictionary = dictionaryService.getDeepCopyDictionary(renalyzeRequest.getRuleSetId()); List sectionSearchableTextPairs = new ArrayList<>(); - for (ReanalysisSection reanalysisSection : reanalysisSections) { + for (SectionText reanalysisSection : reanalysisSections) { Set entities = entityRedactionService.findEntities(reanalysisSection.getSearchableText(), reanalysisSection .getHeadline(), reanalysisSection.getSectionNumber(), dictionary, false); @@ -254,7 +213,7 @@ public class ReanalyzeService { } List newRedactionLogEntries = new ArrayList<>(); - for (int page = 1; page <= pdDocument.getNumberOfPages(); page++) { + for (int page = 1; page <= text.getNumberOfPages(); page++) { if (entitiesPerPage.get(page) != null) { newRedactionLogEntries.addAll(redactionLogCreatorService.addEntries(entitiesPerPage, renalyzeRequest .getManualRedactions(), page, renalyzeRequest.getRuleSetId())); @@ -269,19 +228,14 @@ public class ReanalyzeService { .getRuleSetId())); } - Iterator itty = renalyzeRequest.getRedactionLog().getRedactionLogEntry().iterator(); - while (itty.hasNext()) { - RedactionLogEntry entry = itty.next(); - if (sectionsToReanaylse.contains(entry.getSectionNumber())) { - itty.remove(); - } - } + redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.isImage() || entry.getSectionNumber() == 0 && !entry.isImage()); + redactionLog.getRedactionLogEntry().addAll(newRedactionLogEntries); + redactionLog.setDictionaryVersion(dictionaryIncrement.getDictionaryVersion()); - renalyzeRequest.getRedactionLog().getRedactionLogEntry().addAll(newRedactionLogEntries); + var changeLog = redactionChangeLogService.createAndStoreChangeLog(renalyzeRequest.getProjectId(), renalyzeRequest.getFileId(), redactionLog); + redactionStorageService.storeObject(renalyzeRequest.getProjectId(), renalyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLog); + return analyzeResponseService.createAnalyzeResponse(text.getNumberOfPages(), redactionLog, changeLog); - renalyzeRequest.getRedactionLog().setDictionaryVersion(dictionaryIncrement.getDictionaryVersion()); - - return ReanalyzeResult.builder().redactionLog(renalyzeRequest.getRedactionLog()).build(); } catch (Exception e) { throw new RedactionException(e); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionChangeLogService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionChangeLogService.java new file mode 100644 index 00000000..e89b3052 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionChangeLogService.java @@ -0,0 +1,94 @@ +package com.iqser.red.service.redaction.v1.server.redaction.service; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.iqser.red.service.file.management.v1.api.model.ChangeType; +import com.iqser.red.service.file.management.v1.api.model.FileType; +import com.iqser.red.service.file.management.v1.api.model.RedactionChangeLog; +import com.iqser.red.service.file.management.v1.api.model.RedactionChangeLogEntry; +import com.iqser.red.service.redaction.v1.model.RedactionLog; +import com.iqser.red.service.redaction.v1.model.RedactionLogEntry; +import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +@Slf4j +@Service +@RequiredArgsConstructor +public class RedactionChangeLogService { + + private final RedactionStorageService storageStorageService; + private final ObjectMapper objectMapper; + + public RedactionChangeLog createAndStoreChangeLog(String projectId, String fileId, RedactionLog currentRedactionLog) { + + try { + RedactionLog previousRedactionLog = storageStorageService.getRedactionLog(projectId, fileId); + var changeLog = createChangeLog(currentRedactionLog, previousRedactionLog); + storageStorageService.storeObject(projectId, fileId, FileType.REDACTION_CHANGELOG, objectMapper.writeValueAsBytes(changeLog)); + return changeLog; + } catch (Exception e) { + log.debug("Previous redaction log not available"); + return null; + } + + } + + + private RedactionChangeLog createChangeLog(RedactionLog currentRedactionLog, RedactionLog previousRedactionLog) { + + + if (previousRedactionLog == null) { + return null; + } + + List added = new ArrayList<>(currentRedactionLog.getRedactionLogEntry()); + added.removeAll(previousRedactionLog.getRedactionLogEntry()); + + List removed = new ArrayList<>(previousRedactionLog.getRedactionLogEntry()); + removed.removeAll(currentRedactionLog.getRedactionLogEntry()); + + List changeLogEntries = added.stream() + .map(entry -> convert(entry, ChangeType.ADDED)) + .collect(Collectors.toList()); + changeLogEntries.addAll(removed.stream() + .map(entry -> convert(entry, ChangeType.REMOVED)) + .collect(Collectors.toList())); + + return new RedactionChangeLog(changeLogEntries, currentRedactionLog.getDictionaryVersion(), currentRedactionLog.getRulesVersion(), currentRedactionLog + .getRuleSetId(), currentRedactionLog.getFilename()); + } + + + private RedactionChangeLogEntry convert(RedactionLogEntry entry, ChangeType changeType) { + + return RedactionChangeLogEntry.builder() + .id(entry.getId()) + .type(entry.getType()) + .value(entry.getValue()) + .reason(entry.getReason()) + .matchedRule(entry.getMatchedRule()) + .legalBasis(entry.getLegalBasis()) + .redacted(entry.isRedacted()) + .isHint(entry.isHint()) + .isRecommendation(entry.isRecommendation()) + .section(entry.getSection()) + .color(entry.getColor()) + .positions(entry.getPositions()) + .sectionNumber(entry.getSectionNumber()) + .manual(entry.isManual()) + .status(entry.getStatus()) + .manualRedactionType(entry.getManualRedactionType()) + .isDictionaryEntry(entry.isDictionaryEntry()) + .textBefore(entry.getTextBefore()) + .textAfter(entry.getTextAfter()) + .comments(entry.getComments()) + .changeType(changeType) + .build(); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java index 44a8941a..a046fb08 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java @@ -1,28 +1,6 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; - -import org.apache.commons.collections4.CollectionUtils; -import org.apache.pdfbox.text.TextPosition; -import org.springframework.stereotype.Service; - -import com.iqser.red.service.redaction.v1.model.CellRectangle; -import com.iqser.red.service.redaction.v1.model.Comment; -import com.iqser.red.service.redaction.v1.model.IdRemoval; -import com.iqser.red.service.redaction.v1.model.ManualForceRedact; -import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry; -import com.iqser.red.service.redaction.v1.model.ManualRedactionType; -import com.iqser.red.service.redaction.v1.model.ManualRedactions; -import com.iqser.red.service.redaction.v1.model.Point; -import com.iqser.red.service.redaction.v1.model.Rectangle; -import com.iqser.red.service.redaction.v1.model.RedactionLogEntry; -import com.iqser.red.service.redaction.v1.model.SectionRectangle; -import com.iqser.red.service.redaction.v1.model.Status; +import com.iqser.red.service.redaction.v1.model.*; import com.iqser.red.service.redaction.v1.server.classification.model.Document; import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph; import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; @@ -34,8 +12,17 @@ import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder; import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; - import lombok.RequiredArgsConstructor; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.pdfbox.text.TextPosition; +import org.springframework.stereotype.Service; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; @Service @RequiredArgsConstructor diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java index f7fffaa4..91438cf4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/EntitySearchUtils.java @@ -1,25 +1,17 @@ package com.iqser.red.service.redaction.v1.server.redaction.utils; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Set; -import java.util.regex.Pattern; -import java.util.stream.Collectors; - import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary; import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue; import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence; import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText; - import lombok.experimental.UtilityClass; import lombok.extern.slf4j.Slf4j; +import java.util.*; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + @Slf4j @UtilityClass @SuppressWarnings("PMD") @@ -46,7 +38,7 @@ public class EntitySearchUtils { if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString .charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) { - if(value.isCaseinsensitive() || !value.isCaseinsensitive() && sectionText.substring(startIndex, stopIndex).equals(value.getValue())){ + if (value.isCaseinsensitive() || !value.isCaseinsensitive() && sectionText.substring(startIndex, stopIndex).equals(value.getValue())) { return true; } } @@ -147,16 +139,16 @@ public class EntitySearchUtils { public void addEntitiesWithHigherRank(Set entities, Entity found, Dictionary dictionary) { - if(entities.contains(found)){ + if (entities.contains(found)) { Entity existing = entities.stream().filter(entity -> entity.equals(found)).findFirst().get(); - if (dictionary.getDictionaryRank(existing.getType()) <= dictionary.getDictionaryRank(found.getType())){ + if (dictionary.getDictionaryRank(existing.getType()) <= dictionary.getDictionaryRank(found.getType())) { entities.remove(found); } } entities.add(found); } - public void addEntitiesIgnoreRank(Set entities, Set found){ + public void addEntitiesIgnoreRank(Set entities, Set found) { // HashSet keeps old value but we want the new. entities.removeAll(found); entities.addAll(found); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/IdBuilder.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/IdBuilder.java index ce3c7540..34a712fe 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/IdBuilder.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/IdBuilder.java @@ -1,15 +1,14 @@ package com.iqser.red.service.redaction.v1.server.redaction.utils; -import java.awt.geom.Rectangle2D; -import java.nio.charset.StandardCharsets; -import java.util.List; - import com.google.common.hash.HashFunction; import com.google.common.hash.Hashing; import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; - import lombok.experimental.UtilityClass; +import java.awt.geom.Rectangle2D; +import java.nio.charset.StandardCharsets; +import java.util.List; + @UtilityClass public class IdBuilder { @@ -26,7 +25,7 @@ public class IdBuilder { } - public String buildId(Rectangle2D rectangle2D, int page){ + public String buildId(Rectangle2D rectangle2D, int page) { StringBuilder sb = new StringBuilder(); sb.append("x").append(rectangle2D.getX()).append("y").append(rectangle2D.getY()).append("h").append(rectangle2D.getHeight()).append("w").append(rectangle2D.getWidth()).append("p").append(page); @@ -35,5 +34,4 @@ public class IdBuilder { } - } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/ResourceLoader.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/ResourceLoader.java index 5fc33bbd..accd2f30 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/ResourceLoader.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/ResourceLoader.java @@ -1,5 +1,7 @@ package com.iqser.red.service.redaction.v1.server.redaction.utils; +import lombok.experimental.UtilityClass; + import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; @@ -8,8 +10,6 @@ import java.nio.charset.StandardCharsets; import java.util.Set; import java.util.stream.Collectors; -import lombok.experimental.UtilityClass; - @UtilityClass public class ResourceLoader { @@ -27,4 +27,4 @@ public class ResourceLoader { } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/TextNormalizationUtilities.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/TextNormalizationUtilities.java index 374eebd3..b740286b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/TextNormalizationUtilities.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/TextNormalizationUtilities.java @@ -7,6 +7,7 @@ public class TextNormalizationUtilities { /** * Revert hyphenation due to line breaks. + * * @param text Text to be processed. * @return Text without line-break hyphenation. */ @@ -14,4 +15,4 @@ public class TextNormalizationUtilities { return text.replaceAll("([^\\s\\d\\-]{2,})[\\-\\u00AD]\\R|\n\r(.+ )", "$1$2"); } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java index 5d88c0cb..77e088a5 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java @@ -1,28 +1,36 @@ package com.iqser.red.service.redaction.v1.server.segmentation; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - +import com.iqser.red.service.redaction.v1.model.SectionArea; +import com.iqser.red.service.redaction.v1.server.classification.model.Document; +import com.iqser.red.service.redaction.v1.server.classification.model.Page; +import com.iqser.red.service.redaction.v1.server.classification.model.SectionText; +import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; +import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService; +import com.iqser.red.service.redaction.v1.server.classification.service.ClassificationService; +import com.iqser.red.service.redaction.v1.server.exception.RedactionException; +import com.iqser.red.service.redaction.v1.server.parsing.PDFAreaTextStripper; +import com.iqser.red.service.redaction.v1.server.parsing.PDFLinesTextStripper; +import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; +import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue; +import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; +import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; +import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; +import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings; +import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService; +import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.springframework.stereotype.Service; -import com.iqser.red.service.redaction.v1.server.classification.model.Document; -import com.iqser.red.service.redaction.v1.server.classification.model.Page; -import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; -import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService; -import com.iqser.red.service.redaction.v1.server.classification.service.ClassificationService; -import com.iqser.red.service.redaction.v1.server.parsing.PDFLinesTextStripper; -import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; -import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; -import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings; -import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService; -import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService; - -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; +import java.awt.geom.Rectangle2D; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; @Slf4j @Service @@ -36,6 +44,82 @@ public class PdfSegmentationService { private final SectionsBuilderService sectionsBuilderService; + private final RedactionStorageService redactionStorageService; + + + private void postProcessSections(PDDocument pdDocument, List texts) { + + try { + for (SectionText sectionText : texts) { + + List textBlocks = new ArrayList<>(); + + Map> sectionAreasPerPage = new HashMap<>(); + for (SectionArea sectionArea : sectionText.getSectionAreas()) { + sectionAreasPerPage.computeIfAbsent(sectionArea.getPage(), (x) -> new ArrayList<>()) + .add(sectionArea); + } + + Map tabularData = new HashMap<>(); + List cellStarts = new ArrayList<>(); + for (Integer page : sectionAreasPerPage.keySet()) { + List areasOnPage = sectionAreasPerPage.get(page); + + PDPage pdPage = pdDocument.getPage(page - 1); + PDRectangle cropBox = pdPage.getCropBox(); + PDFAreaTextStripper textStripper = new PDFAreaTextStripper(); + textStripper.setPageNumber(page); + + int cellStart = 0; + for (SectionArea sectionArea : areasOnPage) { + + Rectangle2D rect = null; + if (pdPage.getRotation() == 90) { + rect = new Rectangle2D.Float(sectionArea.getTopLeft().getY(), sectionArea.getTopLeft() + .getX(), sectionArea.getHeight(), sectionArea.getWidth() + 0.001f); + } else { + rect = new Rectangle2D.Float(sectionArea.getTopLeft().getX(), -sectionArea.getTopLeft() + .getY() + cropBox.getUpperRightY() - sectionArea.getHeight(), sectionArea.getWidth(), sectionArea + .getHeight() + 0.001f); + } + + textStripper.addRegion(String.valueOf(1), rect); + textStripper.extractRegions(pdPage); + textStripper.getTextForRegion(String.valueOf(1)); + List positions = textStripper.getTextPositionSequences(); + + TextBlock textBlock = new TextBlock(sectionArea.getTopLeft().getX(), sectionArea.getTopLeft() + .getX() + sectionArea.getWidth(), sectionArea.getTopLeft() + .getY(), sectionArea.getTopLeft().getY() + sectionArea.getHeight(), positions, 0); + + if (sectionText.isTable()) { + Cell cell = new Cell(); + cell.addTextBlock(textBlock); + tabularData.put(sectionArea.getHeader(), new CellValue(cell.getTextBlocks(), cellStart)); + cellStarts.add(cellStart); + cellStart = cellStart + cell.toString().trim().length() + 1; + } + + textBlocks.add(textBlock); + textStripper.clearPositions(); + } + + } + sectionText.setTextBlocks(textBlocks); + sectionText.setTabularData(tabularData); + if (sectionText.isTable()) { + sectionText.setCellStarts(cellStarts); + } + } + + + } catch (Exception e) { + throw new RedactionException(e); + } + + } + + public Document parseDocument(PDDocument pdDocument) throws IOException { Document document = new Document(); @@ -82,6 +166,9 @@ public class PdfSegmentationService { sectionsBuilderService.buildSections(document); sectionsBuilderService.addImagesToSections(document); + // This can be improved an done in one pass, but it's complicated to do right away + postProcessSections(pdDocument, document.getSectionText()); + return document; } @@ -116,4 +203,4 @@ public class PdfSegmentationService { } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/SectionsBuilderService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/SectionsBuilderService.java index 68d5574e..57da945f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/SectionsBuilderService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/SectionsBuilderService.java @@ -1,29 +1,15 @@ package com.iqser.red.service.redaction.v1.server.segmentation; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.SortedSet; -import java.util.TreeSet; -import java.util.stream.Collectors; - -import org.apache.commons.collections4.CollectionUtils; -import org.springframework.stereotype.Service; - -import com.iqser.red.service.redaction.v1.server.classification.model.Document; -import com.iqser.red.service.redaction.v1.server.classification.model.Footer; -import com.iqser.red.service.redaction.v1.server.classification.model.Header; -import com.iqser.red.service.redaction.v1.server.classification.model.Page; -import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph; -import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; -import com.iqser.red.service.redaction.v1.server.classification.model.UnclassifiedText; +import com.iqser.red.service.redaction.v1.server.classification.model.*; import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage; import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; +import org.apache.commons.collections4.CollectionUtils; +import org.springframework.stereotype.Service; + +import java.util.*; +import java.util.stream.Collectors; @Service public class SectionsBuilderService { @@ -302,4 +288,4 @@ public class SectionsBuilderService { } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/settings/RedactionServiceSettings.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/settings/RedactionServiceSettings.java index eb57a0f6..f7c9f894 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/settings/RedactionServiceSettings.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/settings/RedactionServiceSettings.java @@ -1,17 +1,16 @@ package com.iqser.red.service.redaction.v1.server.settings; -import org.springframework.boot.context.properties.ConfigurationProperties; - import lombok.Data; +import org.springframework.boot.context.properties.ConfigurationProperties; @Data @ConfigurationProperties("redaction-service") public class RedactionServiceSettings { - + private int numberOfSurroundingWords = 3; private int surroundingWordsOffsetWindow = 100; private boolean enableImageClassification = true; -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java new file mode 100644 index 00000000..c9792c0f --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java @@ -0,0 +1,109 @@ +package com.iqser.red.service.redaction.v1.server.storage; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.iqser.red.service.file.management.v1.api.model.FileType; +import com.iqser.red.service.redaction.v1.model.RedactionLog; +import com.iqser.red.service.redaction.v1.model.SectionGrid; +import com.iqser.red.service.redaction.v1.server.classification.model.Text; +import com.iqser.red.storage.commons.exception.StorageObjectDoesNotExist; +import com.iqser.red.storage.commons.service.StorageService; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; +import org.springframework.core.io.InputStreamResource; +import org.springframework.stereotype.Service; + +import java.io.IOException; +import java.io.InputStream; + +@Slf4j +@Service +@RequiredArgsConstructor +public class RedactionStorageService { + + private final ObjectMapper objectMapper; + private final StorageService storageService; + + @SneakyThrows + public InputStream getStoredObject(String storageId) { + return storageService.getObject(storageId).getInputStream(); + } + + + @SneakyThrows + public void storeObject(String projectId, String fileId, FileType fileType, Object any) { + storageService.storeObject(StorageIdUtils.getStorageId(projectId, fileId, fileType), objectMapper.writeValueAsBytes(any)); + } + + + public RedactionLog getRedactionLog(String projectId, String fileId) { + + InputStreamResource inputStreamResource; + try { + inputStreamResource = storageService.getObject(StorageIdUtils.getStorageId(projectId, fileId, FileType.REDACTION_LOG)); + } catch (StorageObjectDoesNotExist e) { + log.debug("Text not available."); + return null; + } + + try { + return objectMapper.readValue(inputStreamResource.getInputStream(), RedactionLog.class); + } catch (IOException e) { + throw new RuntimeException("Could not convert Text", e); + } + } + + + public Text getText(String projectId, String fileId) { + + InputStreamResource inputStreamResource; + try { + inputStreamResource = storageService.getObject(StorageIdUtils.getStorageId(projectId, fileId, FileType.TEXT)); + } catch (StorageObjectDoesNotExist e) { + log.debug("Text not available."); + return null; + } + + try { + return objectMapper.readValue(inputStreamResource.getInputStream(), Text.class); + } catch (IOException e) { + throw new RuntimeException("Could not convert Text", e); + } + } + + + public SectionGrid getSectionGrid(String projectId, String fileId) { + + var sectionGrid = storageService.getObject(StorageIdUtils.getStorageId(projectId, fileId, FileType.SECTION_GRID)); + try { + return objectMapper.readValue(sectionGrid.getInputStream(), SectionGrid.class); + } catch (IOException e) { + throw new RuntimeException("Could not convert RedactionLog", e); + } + } + + + @RequiredArgsConstructor + public enum StorageType { + PARSED_DOCUMENT(".json"); + + @Getter + private final String extension; + + } + + public static class StorageIdUtils { + + public static String getStorageId(String projectId, String fileId, FileType fileType) { + return projectId + "/" + fileId + "." + fileType.name() + fileType.getExtension(); + } + + + public static String getStorageId(String userId, String projectId, String filename) { + + return userId + "/" + projectId + "/" + filename; + } + + } +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/AbstractTextContainer.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/AbstractTextContainer.java index faa2b690..2f6183ab 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/AbstractTextContainer.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/AbstractTextContainer.java @@ -1,7 +1,6 @@ package com.iqser.red.service.redaction.v1.server.tableextraction.model; import com.iqser.red.service.redaction.v1.model.Rectangle; - import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; @@ -25,7 +24,7 @@ public abstract class AbstractTextContainer { } public boolean contains(Rectangle other) { - return page == other.getPage() && this.minX <= other.getTopLeft().getX() && this.maxX >= other.getTopLeft().getX() + other.getWidth() && this.minY <= other.getTopLeft().getY() && this.maxY >= other.getTopLeft().getY() + other.getHeight(); + return page == other.getPage() && this.minX <= other.getTopLeft().getX() && this.maxX >= other.getTopLeft().getX() + other.getWidth() && this.minY <= other.getTopLeft().getY() && this.maxY >= other.getTopLeft().getY() + other.getHeight(); } public float getHeight() { @@ -36,4 +35,4 @@ public abstract class AbstractTextContainer { return maxX - minX; } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Cell.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Cell.java index b6efcb3f..e14f5da0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Cell.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Cell.java @@ -1,18 +1,17 @@ package com.iqser.red.service.redaction.v1.server.tableextraction.model; +import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; +import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; +import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; + import java.awt.geom.Point2D; import java.util.ArrayList; import java.util.Iterator; import java.util.List; -import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; -import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; -import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities; - -import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.NoArgsConstructor; - @SuppressWarnings("serial") @Data @EqualsAndHashCode(callSuper = true) @@ -71,7 +70,4 @@ public class Cell extends Rectangle { } - - - -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/CleanRulings.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/CleanRulings.java index f1a72a20..73fb9e13 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/CleanRulings.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/CleanRulings.java @@ -1,10 +1,10 @@ package com.iqser.red.service.redaction.v1.server.tableextraction.model; -import java.util.List; - import lombok.Builder; import lombok.Data; +import java.util.List; + @Data @Builder public class CleanRulings { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Rectangle.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Rectangle.java index d897840f..2b1b7509 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Rectangle.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Rectangle.java @@ -8,170 +8,171 @@ import java.util.List; @SuppressWarnings("all") public class Rectangle extends Rectangle2D.Float { - /** - * Ill-defined comparator, from when Rectangle was Comparable. - * - * see https://github.com/tabulapdf/tabula-java/issues/116 - * @deprecated with no replacement - */ - @Deprecated - public static final Comparator ILL_DEFINED_ORDER = new Comparator() { - @Override public int compare(Rectangle o1, Rectangle o2) { - if (o1.equals(o2)) return 0; - if (o1.verticalOverlap(o2) > VERTICAL_COMPARISON_THRESHOLD) { - return o1.isLtrDominant() == -1 && o2.isLtrDominant() == -1 - ? - java.lang.Double.compare(o1.getX(), o2.getX()) - : java.lang.Double.compare(o1.getX(), o2.getX()); - } else { - return java.lang.Float.compare(o1.getBottom(), o2.getBottom()); - } - } - }; - - protected static final float VERTICAL_COMPARISON_THRESHOLD = 0.4f; + protected static final float VERTICAL_COMPARISON_THRESHOLD = 0.4f; + /** + * Ill-defined comparator, from when Rectangle was Comparable. + *

+ * see https://github.com/tabulapdf/tabula-java/issues/116 + * + * @deprecated with no replacement + */ + @Deprecated + public static final Comparator ILL_DEFINED_ORDER = new Comparator() { + @Override + public int compare(Rectangle o1, Rectangle o2) { + if (o1.equals(o2)) return 0; + if (o1.verticalOverlap(o2) > VERTICAL_COMPARISON_THRESHOLD) { + return o1.isLtrDominant() == -1 && o2.isLtrDominant() == -1 + ? -java.lang.Double.compare(o1.getX(), o2.getX()) + : java.lang.Double.compare(o1.getX(), o2.getX()); + } else { + return java.lang.Float.compare(o1.getBottom(), o2.getBottom()); + } + } + }; - public Rectangle() { - super(); - } + public Rectangle() { + super(); + } - public Rectangle(float top, float left, float width, float height) { - super(); - this.setRect(left, top, width, height); - } + public Rectangle(float top, float left, float width, float height) { + super(); + this.setRect(left, top, width, height); + } - public int compareTo(Rectangle other) { - return ILL_DEFINED_ORDER.compare(this, other); - } + /** + * @param rectangles + * @return minimum bounding box that contains all the rectangles + */ + public static Rectangle boundingBoxOf(List rectangles) { + float minx = java.lang.Float.MAX_VALUE; + float miny = java.lang.Float.MAX_VALUE; + float maxx = java.lang.Float.MIN_VALUE; + float maxy = java.lang.Float.MIN_VALUE; - // I'm bad at Java and need this for fancy sorting in - // technology.tabula.TextChunk. - public int isLtrDominant() { - return 0; - } + for (Rectangle r : rectangles) { + minx = (float) Math.min(r.getMinX(), minx); + miny = (float) Math.min(r.getMinY(), miny); + maxx = (float) Math.max(r.getMaxX(), maxx); + maxy = (float) Math.max(r.getMaxY(), maxy); + } + return new Rectangle(miny, minx, maxx - minx, maxy - miny); + } - public float getArea() { - return this.width * this.height; - } + public int compareTo(Rectangle other) { + return ILL_DEFINED_ORDER.compare(this, other); + } - public float verticalOverlap(Rectangle other) { - return Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop())); - } + // I'm bad at Java and need this for fancy sorting in + // technology.tabula.TextChunk. + public int isLtrDominant() { + return 0; + } - public boolean verticallyOverlaps(Rectangle other) { - return verticalOverlap(other) > 0; - } + public float getArea() { + return this.width * this.height; + } - public float horizontalOverlap(Rectangle other) { - return Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft())); - } + public float verticalOverlap(Rectangle other) { + return Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop())); + } - public boolean horizontallyOverlaps(Rectangle other) { - return horizontalOverlap(other) > 0; - } + public boolean verticallyOverlaps(Rectangle other) { + return verticalOverlap(other) > 0; + } - public float verticalOverlapRatio(Rectangle other) { - float rv = 0, delta = Math.min(this.getBottom() - this.getTop(), other.getBottom() - other.getTop()); + public float horizontalOverlap(Rectangle other) { + return Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft())); + } - if (other.getTop() <= this.getTop() && this.getTop() <= other.getBottom() - && other.getBottom() <= this.getBottom()) { - rv = (other.getBottom() - this.getTop()) / delta; - } else if (this.getTop() <= other.getTop() && other.getTop() <= this.getBottom() - && this.getBottom() <= other.getBottom()) { - rv = (this.getBottom() - other.getTop()) / delta; - } else if (this.getTop() <= other.getTop() && other.getTop() <= other.getBottom() - && other.getBottom() <= this.getBottom()) { - rv = (other.getBottom() - other.getTop()) / delta; - } else if (other.getTop() <= this.getTop() && this.getTop() <= this.getBottom() - && this.getBottom() <= other.getBottom()) { - rv = (this.getBottom() - this.getTop()) / delta; - } + public boolean horizontallyOverlaps(Rectangle other) { + return horizontalOverlap(other) > 0; + } - return rv; + public float verticalOverlapRatio(Rectangle other) { + float rv = 0, delta = Math.min(this.getBottom() - this.getTop(), other.getBottom() - other.getTop()); - } + if (other.getTop() <= this.getTop() && this.getTop() <= other.getBottom() + && other.getBottom() <= this.getBottom()) { + rv = (other.getBottom() - this.getTop()) / delta; + } else if (this.getTop() <= other.getTop() && other.getTop() <= this.getBottom() + && this.getBottom() <= other.getBottom()) { + rv = (this.getBottom() - other.getTop()) / delta; + } else if (this.getTop() <= other.getTop() && other.getTop() <= other.getBottom() + && other.getBottom() <= this.getBottom()) { + rv = (other.getBottom() - other.getTop()) / delta; + } else if (other.getTop() <= this.getTop() && this.getTop() <= this.getBottom() + && this.getBottom() <= other.getBottom()) { + rv = (this.getBottom() - this.getTop()) / delta; + } - public float overlapRatio(Rectangle other) { - double intersectionWidth = Math.max(0, - Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft())); - double intersectionHeight = Math.max(0, - Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop())); - double intersectionArea = Math.max(0, intersectionWidth * intersectionHeight); - double unionArea = this.getArea() + other.getArea() - intersectionArea; + return rv; - return (float) (intersectionArea / unionArea); - } + } - public Rectangle merge(Rectangle other) { - this.setRect(this.createUnion(other)); - return this; - } + public float overlapRatio(Rectangle other) { + double intersectionWidth = Math.max(0, + Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft())); + double intersectionHeight = Math.max(0, + Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop())); + double intersectionArea = Math.max(0, intersectionWidth * intersectionHeight); + double unionArea = this.getArea() + other.getArea() - intersectionArea; - public float getTop() { - return (float) this.getMinY(); - } + return (float) (intersectionArea / unionArea); + } - public void setTop(float top) { - float deltaHeight = top - this.y; - this.setRect(this.x, top, this.width, this.height - deltaHeight); - } + public Rectangle merge(Rectangle other) { + this.setRect(this.createUnion(other)); + return this; + } - public float getRight() { - return (float) this.getMaxX(); - } + public float getTop() { + return (float) this.getMinY(); + } - public void setRight(float right) { - this.setRect(this.x, this.y, right - this.x, this.height); - } + public void setTop(float top) { + float deltaHeight = top - this.y; + this.setRect(this.x, top, this.width, this.height - deltaHeight); + } - public float getLeft() { - return (float) this.getMinX(); - } + public float getRight() { + return (float) this.getMaxX(); + } - public void setLeft(float left) { - float deltaWidth = left - this.x; - this.setRect(left, this.y, this.width - deltaWidth, this.height); - } + public void setRight(float right) { + this.setRect(this.x, this.y, right - this.x, this.height); + } - public float getBottom() { - return (float) this.getMaxY(); - } + public float getLeft() { + return (float) this.getMinX(); + } - public void setBottom(float bottom) { - this.setRect(this.x, this.y, this.width, bottom - this.y); - } + public void setLeft(float left) { + float deltaWidth = left - this.x; + this.setRect(left, this.y, this.width - deltaWidth, this.height); + } - public Point2D[] getPoints() { - return new Point2D[] { new Point2D.Float(this.getLeft(), this.getTop()), - new Point2D.Float(this.getRight(), this.getTop()), new Point2D.Float(this.getRight(), this.getBottom()), - new Point2D.Float(this.getLeft(), this.getBottom()) }; - } + public float getBottom() { + return (float) this.getMaxY(); + } - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - String s = super.toString(); - sb.append(s.substring(0, s.length() - 1)); - sb.append(String.format(",bottom=%f,right=%f]", this.getBottom(), this.getRight())); - return sb.toString(); - } + public void setBottom(float bottom) { + this.setRect(this.x, this.y, this.width, bottom - this.y); + } - /** - * @param rectangles - * @return minimum bounding box that contains all the rectangles - */ - public static Rectangle boundingBoxOf(List rectangles) { - float minx = java.lang.Float.MAX_VALUE; - float miny = java.lang.Float.MAX_VALUE; - float maxx = java.lang.Float.MIN_VALUE; - float maxy = java.lang.Float.MIN_VALUE; + public Point2D[] getPoints() { + return new Point2D[]{new Point2D.Float(this.getLeft(), this.getTop()), + new Point2D.Float(this.getRight(), this.getTop()), new Point2D.Float(this.getRight(), this.getBottom()), + new Point2D.Float(this.getLeft(), this.getBottom())}; + } - for (Rectangle r : rectangles) { - minx = (float) Math.min(r.getMinX(), minx); - miny = (float) Math.min(r.getMinY(), miny); - maxx = (float) Math.max(r.getMaxX(), maxx); - maxy = (float) Math.max(r.getMaxY(), maxy); - } - return new Rectangle(miny, minx, maxx - minx, maxy - miny); - } + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + String s = super.toString(); + sb.append(s.substring(0, s.length() - 1)); + sb.append(String.format(",bottom=%f,right=%f]", this.getBottom(), this.getRight())); + return sb.toString(); + } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/RectangleSpatialIndex.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/RectangleSpatialIndex.java index 79f08ec4..404b66e9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/RectangleSpatialIndex.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/RectangleSpatialIndex.java @@ -1,12 +1,11 @@ package com.iqser.red.service.redaction.v1.server.tableextraction.model; -import java.util.ArrayList; -import java.util.List; - +import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils; import org.locationtech.jts.geom.Envelope; import org.locationtech.jts.index.strtree.STRtree; -import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils; +import java.util.ArrayList; +import java.util.List; @SuppressWarnings("all") public class RectangleSpatialIndex { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Ruling.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Ruling.java index 98e3b300..e90c52b2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Ruling.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Ruling.java @@ -1,20 +1,13 @@ package com.iqser.red.service.redaction.v1.server.tableextraction.model; +import com.iqser.red.service.redaction.v1.server.tableextraction.utils.CohenSutherlandClipping; +import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils; +import lombok.extern.slf4j.Slf4j; + import java.awt.geom.Line2D; import java.awt.geom.Point2D; import java.awt.geom.Rectangle2D; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.Formatter; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; - -import com.iqser.red.service.redaction.v1.server.tableextraction.utils.CohenSutherlandClipping; -import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils; - -import lombok.extern.slf4j.Slf4j; +import java.util.*; @Slf4j @@ -23,13 +16,127 @@ public class Ruling extends Line2D.Float { private static int PERPENDICULAR_PIXEL_EXPAND_AMOUNT = 2; - private enum SOType {VERTICAL, HRIGHT, HLEFT} - - public Ruling(Point2D p1, Point2D p2) { super(p1, p2); } + public static List cropRulingsToArea(List rulings, Rectangle2D area) { + ArrayList rv = new ArrayList<>(); + for (Ruling r : rulings) { + if (r.intersects(area)) { + rv.add(r.intersect(area)); + } + } + return rv; + } + + // log(n) implementation of find_intersections + // based on http://people.csail.mit.edu/indyk/6.838-old/handouts/lec2.pdf + public static Map findIntersections(List horizontals, List verticals) { + + class SortObject { + protected SOType type; + protected float position; + protected Ruling ruling; + + public SortObject(SOType type, float position, Ruling ruling) { + this.type = type; + this.position = position; + this.ruling = ruling; + } + } + + List sos = new ArrayList<>(); + + TreeMap tree = new TreeMap<>(new Comparator() { + @Override + public int compare(Ruling o1, Ruling o2) { + return java.lang.Double.compare(o1.getTop(), o2.getTop()); + } + }); + + TreeMap rv = new TreeMap<>(new Comparator() { + @Override + public int compare(Point2D o1, Point2D o2) { + if (o1.getY() > o2.getY()) { + return 1; + } + if (o1.getY() < o2.getY()) { + return -1; + } + if (o1.getX() > o2.getX()) { + return 1; + } + if (o1.getX() < o2.getX()) { + return -1; + } + return 0; + } + }); + + for (Ruling h : horizontals) { + sos.add(new SortObject(SOType.HLEFT, h.getLeft() - PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h)); + sos.add(new SortObject(SOType.HRIGHT, h.getRight() + PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h)); + } + + for (Ruling v : verticals) { + sos.add(new SortObject(SOType.VERTICAL, v.getLeft(), v)); + } + + Collections.sort(sos, new Comparator() { + @Override + public int compare(SortObject a, SortObject b) { + int rv; + if (Utils.feq(a.position, b.position)) { + if (a.type == SOType.VERTICAL && b.type == SOType.HLEFT) { + rv = 1; + } else if (a.type == SOType.VERTICAL && b.type == SOType.HRIGHT) { + rv = -1; + } else if (a.type == SOType.HLEFT && b.type == SOType.VERTICAL) { + rv = -1; + } else if (a.type == SOType.HRIGHT && b.type == SOType.VERTICAL) { + rv = 1; + } else { + rv = java.lang.Double.compare(a.position, b.position); + } + } else { + return java.lang.Double.compare(a.position, b.position); + } + return rv; + } + }); + + for (SortObject so : sos) { + switch (so.type) { + case VERTICAL: + for (Map.Entry h : tree.entrySet()) { + try { + Point2D i = h.getKey().intersectionPoint(so.ruling); + if (i == null) { + continue; + } + rv.put(i, + new Ruling[]{h.getKey().expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT), + so.ruling.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT)}); + } catch (UnsupportedOperationException e) { + log.info("Some line are oblique, ignoring..."); + continue; + } + } + break; + case HRIGHT: + tree.remove(so.ruling); + break; + case HLEFT: + tree.put(so.ruling, true); + break; + } + } + + return rv; + + } + public boolean vertical() { return this.length() > 0 && Utils.feq(this.x1, this.x2); //diff < ORIENTATION_CHECK_THRESHOLD; } @@ -38,13 +145,13 @@ public class Ruling extends Line2D.Float { return this.length() > 0 && Utils.feq(this.y1, this.y2); //diff < ORIENTATION_CHECK_THRESHOLD; } + // attributes that make sense only for non-oblique lines + // these are used to have a single collapse method (in page, currently) + public boolean oblique() { return !(this.vertical() || this.horizontal()); } - // attributes that make sense only for non-oblique lines - // these are used to have a single collapse method (in page, currently) - public float getPosition() { if (this.oblique()) { throw new UnsupportedOperationException(); @@ -52,7 +159,6 @@ public class Ruling extends Line2D.Float { return this.vertical() ? this.getLeft() : this.getTop(); } - public float getStart() { if (this.oblique()) { throw new UnsupportedOperationException(); @@ -102,12 +208,10 @@ public class Ruling extends Line2D.Float { } } - public boolean perpendicularTo(Ruling other) { return this.vertical() == other.horizontal(); } - public boolean nearlyIntersects(Ruling another, int colinearOrParallelExpandAmount) { if (this.intersectsLine(another)) { return true; @@ -238,7 +342,6 @@ public class Ruling extends Line2D.Float { return angle; } - @Override public String toString() { StringBuilder sb = new StringBuilder(); @@ -248,122 +351,7 @@ public class Ruling extends Line2D.Float { return rv; } - public static List cropRulingsToArea(List rulings, Rectangle2D area) { - ArrayList rv = new ArrayList<>(); - for (Ruling r : rulings) { - if (r.intersects(area)) { - rv.add(r.intersect(area)); - } - } - return rv; - } - - // log(n) implementation of find_intersections - // based on http://people.csail.mit.edu/indyk/6.838-old/handouts/lec2.pdf - public static Map findIntersections(List horizontals, List verticals) { - - class SortObject { - protected SOType type; - protected float position; - protected Ruling ruling; - - public SortObject(SOType type, float position, Ruling ruling) { - this.type = type; - this.position = position; - this.ruling = ruling; - } - } - - List sos = new ArrayList<>(); - - TreeMap tree = new TreeMap<>(new Comparator() { - @Override - public int compare(Ruling o1, Ruling o2) { - return java.lang.Double.compare(o1.getTop(), o2.getTop()); - } - }); - - TreeMap rv = new TreeMap<>(new Comparator() { - @Override - public int compare(Point2D o1, Point2D o2) { - if (o1.getY() > o2.getY()) { - return 1; - } - if (o1.getY() < o2.getY()) { - return -1; - } - if (o1.getX() > o2.getX()) { - return 1; - } - if (o1.getX() < o2.getX()) { - return -1; - } - return 0; - } - }); - - for (Ruling h : horizontals) { - sos.add(new SortObject(SOType.HLEFT, h.getLeft() - PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h)); - sos.add(new SortObject(SOType.HRIGHT, h.getRight() + PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h)); - } - - for (Ruling v : verticals) { - sos.add(new SortObject(SOType.VERTICAL, v.getLeft(), v)); - } - - Collections.sort(sos, new Comparator() { - @Override - public int compare(SortObject a, SortObject b) { - int rv; - if (Utils.feq(a.position, b.position)) { - if (a.type == SOType.VERTICAL && b.type == SOType.HLEFT) { - rv = 1; - } else if (a.type == SOType.VERTICAL && b.type == SOType.HRIGHT) { - rv = -1; - } else if (a.type == SOType.HLEFT && b.type == SOType.VERTICAL) { - rv = -1; - } else if (a.type == SOType.HRIGHT && b.type == SOType.VERTICAL) { - rv = 1; - } else { - rv = java.lang.Double.compare(a.position, b.position); - } - } else { - return java.lang.Double.compare(a.position, b.position); - } - return rv; - } - }); - - for (SortObject so : sos) { - switch (so.type) { - case VERTICAL: - for (Map.Entry h : tree.entrySet()) { - try { - Point2D i = h.getKey().intersectionPoint(so.ruling); - if (i == null) { - continue; - } - rv.put(i, - new Ruling[]{h.getKey().expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT), - so.ruling.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT)}); - } catch(UnsupportedOperationException e){ - log.info("Some line are oblique, ignoring..."); - continue; - } - } - break; - case HRIGHT: - tree.remove(so.ruling); - break; - case HLEFT: - tree.put(so.ruling, true); - break; - } - } - - return rv; - - } + private enum SOType {VERTICAL, HRIGHT, HLEFT} } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java index 8f55b482..6abc086e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java @@ -1,22 +1,13 @@ package com.iqser.red.service.redaction.v1.server.tableextraction.model; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; - -import org.apache.commons.collections4.CollectionUtils; - import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils; - import lombok.Getter; import lombok.Setter; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.collections4.CollectionUtils; + +import java.util.*; @Slf4j public class Table extends AbstractTextContainer { @@ -24,21 +15,14 @@ public class Table extends AbstractTextContainer { private final TreeMap cells = new TreeMap<>(); private final RectangleSpatialIndex si = new RectangleSpatialIndex<>(); - + private final int rotation; @Getter @Setter private String headline; - private int unrotatedRowCount; - private int unrotatedColCount; - private int rowCount = -1; - private int colCount = -1; - - private final int rotation; - private List> rows; @@ -62,8 +46,8 @@ public class Table extends AbstractTextContainer { // Ignore rows that does not contain any cells and values. List> rowsToRemove = new ArrayList<>(); - for (List row: rows){ - if (row.size() == 1 && row.get(0).getTextBlocks().isEmpty()){ + for (List row : rows) { + if (row.size() == 1 && row.get(0).getTextBlocks().isEmpty()) { rowsToRemove.add(row); } } @@ -110,7 +94,7 @@ public class Table extends AbstractTextContainer { // we move from left to right and top to bottom for (int rowIndex = 0; rowIndex < rows.size(); rowIndex++) { List rowCells = rows.get(rowIndex); - if(rowCells.size() == 1){ + if (rowCells.size() == 1) { continue; } @@ -275,7 +259,7 @@ public class Table extends AbstractTextContainer { cells.sort(Collections.reverseOrder((arg0, arg1) -> Float.compare(Utils.round(arg0.getBottom(), 2), Utils.round(arg1 - .getBottom(), 2)))); + .getBottom(), 2)))); Iterator iter = cells.iterator(); Cell c = iter.next(); @@ -367,4 +351,4 @@ public class Table extends AbstractTextContainer { return sb.toString(); } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/RulingCleaningService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/RulingCleaningService.java index 82ca3bb7..6f6ea80a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/RulingCleaningService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/RulingCleaningService.java @@ -1,19 +1,13 @@ package com.iqser.red.service.redaction.v1.server.tableextraction.service; -import java.awt.geom.Line2D; -import java.awt.geom.Point2D; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.springframework.stereotype.Service; - import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling; import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils; +import org.springframework.stereotype.Service; + +import java.awt.geom.Line2D; +import java.awt.geom.Point2D; +import java.util.*; @Service public class RulingCleaningService { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/TableExtractionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/TableExtractionService.java index 3dddd34a..682eb03e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/TableExtractionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/TableExtractionService.java @@ -1,31 +1,57 @@ package com.iqser.red.service.redaction.v1.server.tableextraction.service; -import java.awt.geom.Point2D; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; - -import org.springframework.stereotype.Service; - import com.iqser.red.service.redaction.v1.server.classification.model.Page; import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; -import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; -import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; -import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings; -import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle; -import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling; -import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; +import com.iqser.red.service.redaction.v1.server.tableextraction.model.*; import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils; +import org.springframework.stereotype.Service; + +import java.awt.geom.Point2D; +import java.util.*; +import java.util.stream.Collectors; @Service public class TableExtractionService { + private static final Comparator X_FIRST_POINT_COMPARATOR = (arg0, arg1) -> { + + int rv = 0; + float arg0X = Utils.round(arg0.getX(), 2); + float arg0Y = Utils.round(arg0.getY(), 2); + float arg1X = Utils.round(arg1.getX(), 2); + float arg1Y = Utils.round(arg1.getY(), 2); + + if (arg0X > arg1X) { + rv = 1; + } else if (arg0X < arg1X) { + rv = -1; + } else if (arg0Y > arg1Y) { + rv = 1; + } else if (arg0Y < arg1Y) { + rv = -1; + } + return rv; + }; + private static final Comparator POINT_COMPARATOR = (arg0, arg1) -> { + + int rv = 0; + float arg0X = Utils.round(arg0.getX(), 2); + float arg0Y = Utils.round(arg0.getY(), 2); + float arg1X = Utils.round(arg1.getX(), 2); + float arg1Y = Utils.round(arg1.getY(), 2); + + if (arg0Y > arg1Y) { + rv = 1; + } else if (arg0Y < arg1Y) { + rv = -1; + } else if (arg0X > arg1X) { + rv = 1; + } else if (arg0X < arg1X) { + rv = -1; + } + return rv; + }; + public void extractTables(CleanRulings cleanRulings, Page page) { List cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical()); @@ -80,7 +106,6 @@ public class TableExtractionService { page.getTextBlocks().removeAll(toBeRemoved); } - public List findCells(List horizontalRulingLines, List verticalRulingLines) { List cellsFound = new ArrayList<>(); @@ -133,7 +158,6 @@ public class TableExtractionService { return cellsFound; } - private List findSpreadsheetsFromCells(List cells) { // via: http://stackoverflow.com/questions/13746284/merging-multiple-adjacent-rectangles-into-one-polygon List rectangles = new ArrayList<>(); @@ -233,47 +257,6 @@ public class TableExtractionService { return rectangles; } - - private static final Comparator X_FIRST_POINT_COMPARATOR = (arg0, arg1) -> { - - int rv = 0; - float arg0X = Utils.round(arg0.getX(), 2); - float arg0Y = Utils.round(arg0.getY(), 2); - float arg1X = Utils.round(arg1.getX(), 2); - float arg1Y = Utils.round(arg1.getY(), 2); - - if (arg0X > arg1X) { - rv = 1; - } else if (arg0X < arg1X) { - rv = -1; - } else if (arg0Y > arg1Y) { - rv = 1; - } else if (arg0Y < arg1Y) { - rv = -1; - } - return rv; - }; - - private static final Comparator POINT_COMPARATOR = (arg0, arg1) -> { - - int rv = 0; - float arg0X = Utils.round(arg0.getX(), 2); - float arg0Y = Utils.round(arg0.getY(), 2); - float arg1X = Utils.round(arg1.getX(), 2); - float arg1Y = Utils.round(arg1.getY(), 2); - - if (arg0Y > arg1Y) { - rv = 1; - } else if (arg0Y < arg1Y) { - rv = -1; - } else if (arg0X > arg1X) { - rv = 1; - } else if (arg0X < arg1X) { - rv = -1; - } - return rv; - }; - private enum Direction { HORIZONTAL, VERTICAL } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/CohenSutherlandClipping.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/CohenSutherlandClipping.java index d1f9ab06..bd4b9d0c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/CohenSutherlandClipping.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/CohenSutherlandClipping.java @@ -19,21 +19,24 @@ import java.awt.geom.Rectangle2D; * clipping algorithm (line against clip rectangle). */ @SuppressWarnings("all") -public final class CohenSutherlandClipping -{ +public final class CohenSutherlandClipping { + private static final int INSIDE = 0; + private static final int LEFT = 1; + private static final int RIGHT = 2; + private static final int BOTTOM = 4; + private static final int TOP = 8; private double xMin; private double yMin; private double xMax; private double yMax; - /** * Creates a Cohen Sutherland clipper with clip rect (0, 0, 0, 0). */ public CohenSutherlandClipping() { } - /** * Creates a Cohen Sutherland clipper with the given clip rectangle. + * * @param clip the clip rectangle to use */ public CohenSutherlandClipping(Rectangle2D clip) { @@ -42,6 +45,7 @@ public final class CohenSutherlandClipping /** * Sets the clip rectangle. + * * @param clip the clip rectangle */ public void setClip(Rectangle2D clip) { @@ -51,19 +55,13 @@ public final class CohenSutherlandClipping yMax = yMin + clip.getHeight(); } - private static final int INSIDE = 0; - private static final int LEFT = 1; - private static final int RIGHT = 2; - private static final int BOTTOM = 4; - private static final int TOP = 8; - private final int regionCode(double x, double y) { - int code = x < xMin - ? LEFT - : x > xMax + int code = x < xMin + ? LEFT + : x > xMax ? RIGHT : INSIDE; - if (y < yMin) code |= BOTTOM; + if (y < yMin) code |= BOTTOM; else if (y > yMax) code |= TOP; return code; } @@ -71,6 +69,7 @@ public final class CohenSutherlandClipping /** * Clips a given line against the clip rectangle. * The modification (if needed) is done in place. + * * @param line the line to clip * @return true if line is clipped, false if line is * totally outside the clip rect. @@ -87,9 +86,9 @@ public final class CohenSutherlandClipping boolean vertical = p1x == p2x; - double slope = vertical - ? 0d - : (p2y-p1y)/(p2x-p1x); + double slope = vertical + ? 0d + : (p2y - p1y) / (p2x - p1x); int c1 = regionCode(p1x, p1y); int c2 = regionCode(p2x, p2y); @@ -103,31 +102,27 @@ public final class CohenSutherlandClipping if ((c & LEFT) != INSIDE) { qx = xMin; - qy = (Utils.feq(qx, p1x) ? 0 : qx-p1x)*slope + p1y; - } - else if ((c & RIGHT) != INSIDE) { + qy = (Utils.feq(qx, p1x) ? 0 : qx - p1x) * slope + p1y; + } else if ((c & RIGHT) != INSIDE) { qx = xMax; - qy = (Utils.feq(qx, p1x) ? 0 : qx-p1x)*slope + p1y; - } - else if ((c & BOTTOM) != INSIDE) { + qy = (Utils.feq(qx, p1x) ? 0 : qx - p1x) * slope + p1y; + } else if ((c & BOTTOM) != INSIDE) { qy = yMin; qx = vertical - ? p1x - : (Utils.feq(qy, p1y) ? 0 : qy-p1y)/slope + p1x; - } - else if ((c & TOP) != INSIDE) { + ? p1x + : (Utils.feq(qy, p1y) ? 0 : qy - p1y) / slope + p1x; + } else if ((c & TOP) != INSIDE) { qy = yMax; qx = vertical - ? p1x - : (Utils.feq(qy, p1y) ? 0 : qy-p1y)/slope + p1x; + ? p1x + : (Utils.feq(qy, p1y) ? 0 : qy - p1y) / slope + p1x; } if (c == c1) { p1x = qx; p1y = qy; - c1 = regionCode(p1x, p1y); - } - else { + c1 = regionCode(p1x, p1y); + } else { p2x = qx; p2y = qy; c2 = regionCode(p2x, p2y); @@ -137,4 +132,4 @@ public final class CohenSutherlandClipping return true; } } -// end of file \ No newline at end of file +// end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/QuickSort.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/QuickSort.java index 5b9c3b6c..909de599 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/QuickSort.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/QuickSort.java @@ -10,11 +10,6 @@ import java.util.List; */ public final class QuickSort { - private QuickSort() { - - } - - private static final Comparator OBJCOMP = new Comparator() { @Override public int compare(Comparable object1, Comparable object2) { @@ -24,6 +19,10 @@ public final class QuickSort { }; + private QuickSort() { + + } + /** * Sorts the given list using the given comparator. * diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/Utils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/Utils.java index 62f72434..2a95ec3b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/Utils.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/Utils.java @@ -1,11 +1,11 @@ package com.iqser.red.service.redaction.v1.server.tableextraction.utils; +import lombok.extern.slf4j.Slf4j; + import java.math.BigDecimal; import java.util.Comparator; import java.util.List; -import lombok.extern.slf4j.Slf4j; - @Slf4j @SuppressWarnings("all") public class Utils { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/PdfVisualisationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/PdfVisualisationService.java index 43e2cf13..06ccb399 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/PdfVisualisationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/PdfVisualisationService.java @@ -1,15 +1,5 @@ package com.iqser.red.service.redaction.v1.server.visualization.service; -import java.awt.Color; -import java.io.IOException; -import java.util.List; - -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.PDPageContentStream; -import org.apache.pdfbox.pdmodel.font.PDType1Font; -import org.springframework.stereotype.Service; - import com.iqser.red.service.redaction.v1.server.classification.model.Document; import com.iqser.red.service.redaction.v1.server.classification.model.Page; import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph; @@ -17,9 +7,17 @@ import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; - import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.springframework.stereotype.Service; + +import java.awt.Color; +import java.io.IOException; +import java.util.List; @Slf4j @Service @@ -34,7 +32,7 @@ public class PdfVisualisationService { PDPage pdPage = document.getPage(page - 1); PDPageContentStream contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.APPEND, true); - for(Paragraph paragraph : classifiedDoc.getParagraphs()) { + for (Paragraph paragraph : classifiedDoc.getParagraphs()) { for (int i = 0; i <= paragraph.getPageBlocks().size() - 1; i++) { @@ -44,10 +42,10 @@ public class PdfVisualisationService { continue; } if (textBlock instanceof TextBlock) { - textBlock.setClassification((i+1) + "/" + paragraph.getPageBlocks().size()); + textBlock.setClassification((i + 1) + "/" + paragraph.getPageBlocks().size()); visualizeTextBlock((TextBlock) textBlock, contentStream); } else if (textBlock instanceof Table) { - textBlock.setClassification((i+1) + "/" + paragraph.getPageBlocks().size()); + textBlock.setClassification((i + 1) + "/" + paragraph.getPageBlocks().size()); visualizeTable((Table) textBlock, contentStream); } @@ -59,7 +57,6 @@ public class PdfVisualisationService { } - public void visualizeClassifications(Document classifiedDoc, PDDocument document) throws IOException { for (int page = 1; page <= document.getNumberOfPages(); page++) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/application-dev.yaml b/redaction-service-v1/redaction-service-server-v1/src/main/resources/application-dev.yaml index 302d198a..e19b1e65 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/resources/application-dev.yaml +++ b/redaction-service-v1/redaction-service-server-v1/src/main/resources/application-dev.yaml @@ -1,4 +1,11 @@ server: port: 8083 -configuration-service.url: "http://localhost:8081" \ No newline at end of file +configuration-service.url: "http://localhost:8081" + + +storage: + bucket-name: 'redaction' + endpoint: 'http://localhost:9000' + key: minioadmin + secret: minioadmin diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/application.yml b/redaction-service-v1/redaction-service-server-v1/src/main/resources/application.yml index efb01d6f..15ff3651 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/resources/application.yml +++ b/redaction-service-v1/redaction-service-server-v1/src/main/resources/application.yml @@ -17,4 +17,11 @@ management: prometheus.enabled: ${monitoring.enabled:false} health.enabled: true endpoints.web.exposure.include: prometheus, health - metrics.export.prometheus.enabled: ${monitoring.enabled:false} \ No newline at end of file + metrics.export.prometheus.enabled: ${monitoring.enabled:false} + + +storage: + signer-type: 'AWSS3V4SignerType' + bucket-name: 'redaction' + region: 'us-east-1' + endpoint: 'https://s3.amazonaws.com' diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/InMemoryStorageService.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/InMemoryStorageService.java new file mode 100644 index 00000000..e73acb8b --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/InMemoryStorageService.java @@ -0,0 +1,34 @@ +package com.iqser.red.service.redaction.v1.server; + +import com.iqser.red.storage.commons.exception.StorageObjectDoesNotExist; +import com.iqser.red.storage.commons.service.StorageService; +import org.springframework.core.io.InputStreamResource; + +import java.io.ByteArrayInputStream; +import java.util.HashMap; +import java.util.Map; + +public class InMemoryStorageService extends StorageService { + + private Map dataMap = new HashMap<>(); + + public InMemoryStorageService() { + super(null, null); + } + + @Override + public InputStreamResource getObject(String objectId) { + + var res = dataMap.get(objectId); + if (res == null) { + throw new StorageObjectDoesNotExist(new RuntimeException()); + } + return new InputStreamResource(new ByteArrayInputStream(res)); + + } + + @Override + public void storeObject(String objectId, byte[] data) { + dataMap.put(objectId, data); + } +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 0c459750..75384b34 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -1,28 +1,20 @@ package com.iqser.red.service.redaction.v1.server; -import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.Mockito.when; -import static org.springframework.boot.test.context.SpringBootTest.WebEnvironment.RANDOM_PORT; - -import java.io.BufferedReader; -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.net.URL; -import java.nio.charset.StandardCharsets; -import java.time.OffsetDateTime; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; -import java.util.stream.Collectors; - +import com.amazonaws.services.s3.AmazonS3; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.iqser.red.service.configuration.v1.api.model.*; +import com.iqser.red.service.file.management.v1.api.model.FileType; +import com.iqser.red.service.redaction.v1.model.*; +import com.iqser.red.service.redaction.v1.server.classification.model.SectionText; +import com.iqser.red.service.redaction.v1.server.client.DictionaryClient; +import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient; +import com.iqser.red.service.redaction.v1.server.client.RulesClient; +import com.iqser.red.service.redaction.v1.server.controller.RedactionController; +import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader; +import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities; +import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; +import com.iqser.red.storage.commons.service.StorageService; +import lombok.SneakyThrows; import org.apache.commons.io.IOUtils; import org.junit.Before; import org.junit.Test; @@ -37,40 +29,20 @@ import org.springframework.boot.test.context.SpringBootTest; import org.springframework.boot.test.context.TestConfiguration; import org.springframework.boot.test.mock.mockito.MockBean; import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Primary; import org.springframework.core.io.ClassPathResource; import org.springframework.test.context.junit4.SpringRunner; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.iqser.red.service.configuration.v1.api.model.Colors; -import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry; -import com.iqser.red.service.configuration.v1.api.model.DictionaryResponse; -import com.iqser.red.service.configuration.v1.api.model.RulesResponse; -import com.iqser.red.service.configuration.v1.api.model.TypeResponse; -import com.iqser.red.service.configuration.v1.api.model.TypeResult; -import com.iqser.red.service.redaction.v1.model.AnalyzeRequest; -import com.iqser.red.service.redaction.v1.model.AnalyzeResult; -import com.iqser.red.service.redaction.v1.model.AnnotateRequest; -import com.iqser.red.service.redaction.v1.model.AnnotateResponse; -import com.iqser.red.service.redaction.v1.model.Comment; -import com.iqser.red.service.redaction.v1.model.IdRemoval; -import com.iqser.red.service.redaction.v1.model.ManualForceRedact; -import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry; -import com.iqser.red.service.redaction.v1.model.ManualRedactions; -import com.iqser.red.service.redaction.v1.model.Point; -import com.iqser.red.service.redaction.v1.model.ReanalyzeResult; -import com.iqser.red.service.redaction.v1.model.Rectangle; -import com.iqser.red.service.redaction.v1.model.RedactionLogEntry; -import com.iqser.red.service.redaction.v1.model.RedactionRequest; -import com.iqser.red.service.redaction.v1.model.RedactionResult; -import com.iqser.red.service.redaction.v1.model.RenalyzeRequest; -import com.iqser.red.service.redaction.v1.model.SectionText; -import com.iqser.red.service.redaction.v1.model.Status; -import com.iqser.red.service.redaction.v1.server.client.DictionaryClient; -import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient; -import com.iqser.red.service.redaction.v1.server.client.RulesClient; -import com.iqser.red.service.redaction.v1.server.controller.RedactionController; -import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader; -import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities; +import java.io.*; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.time.OffsetDateTime; +import java.util.*; +import java.util.stream.Collectors; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.when; +import static org.springframework.boot.test.context.SpringBootTest.WebEnvironment.RANDOM_PORT; @RunWith(SpringRunner.class) @SpringBootTest(webEnvironment = RANDOM_PORT) @@ -116,6 +88,15 @@ public class RedactionIntegrationTest { @MockBean private ImageClassificationClient imageClassificationClient; + @Autowired + private RedactionStorageService redactionStorageService; + + @Autowired + private StorageService storageService; + + @MockBean + private AmazonS3 amazonS3; + private final Map> dictionary = new HashMap<>(); private final Map typeColorMap = new HashMap<>(); private final Map hintTypeMap = new HashMap<>(); @@ -126,6 +107,8 @@ public class RedactionIntegrationTest { private final Map reanlysisVersions = new HashMap<>(); private final static String TEST_RULESET_ID = "123"; + private final static String TEST_PROJECT_ID = "123"; + private final static String TEST_FILE_ID = "123"; @TestConfiguration public static class RedactionIntegrationTestConfiguration { @@ -146,6 +129,12 @@ public class RedactionIntegrationTest { return kieServices.newKieContainer(kieModule.getReleaseId()); } + @Bean + @Primary + public StorageService inmemoryStorage() { + return new InMemoryStorageService(); + } + } @@ -464,15 +453,16 @@ public class RedactionIntegrationTest { input.addAll(getPathsRecursively(file)); } for (File path : input) { - AnalyzeRequest request = AnalyzeRequest.builder() - .ruleSetId(TEST_RULESET_ID) - .document(IOUtils.toByteArray(new FileInputStream(path))) - .build(); + + AnalyzeRequest request = prepareStorage(new FileInputStream((path))); System.out.println("Redacting file : " + path.getName()); AnalyzeResult result = redactionController.analyze(request); Map> duplicates = new HashMap<>(); - result.getRedactionLog().getRedactionLogEntry().forEach(entry -> { + + var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID); + + redactionLog.getRedactionLogEntry().forEach(entry -> { duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry); }); @@ -484,13 +474,7 @@ public class RedactionIntegrationTest { when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(1L); long rstart = System.currentTimeMillis(); - ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder() - .redactionLog(result.getRedactionLog()) - .document(IOUtils.toByteArray(new FileInputStream(path))) - .manualRedactions(null) - .text(result.getText()) - .ruleSetId(TEST_RULESET_ID) - .build()); + redactionController.reanalyze(request); long rend = System.currentTimeMillis(); System.out.println("reanalysis analysis duration: " + (rend - rstart)); @@ -528,15 +512,14 @@ public class RedactionIntegrationTest { System.out.println("redactionTest"); long start = System.currentTimeMillis(); ClassPathResource pdfFileResource = new ClassPathResource("files/new/Single Study - Oral (Gavage) Mouse.pdf"); - - AnalyzeRequest request = AnalyzeRequest.builder() - .ruleSetId(TEST_RULESET_ID) - .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) - .build(); + AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream()); AnalyzeResult result = redactionController.analyze(request); - result.getRedactionLog().getRedactionLogEntry().forEach(entry -> { + var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID); + var text = redactionStorageService.getText(TEST_PROJECT_ID, TEST_FILE_ID); + + redactionLog.getRedactionLogEntry().forEach(entry -> { if (entry.isImage()) { System.out.println("---->" + entry.getType()); } @@ -547,13 +530,13 @@ public class RedactionIntegrationTest { System.out.println("first analysis duration: " + (end - start)); try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Test.json")) { - fileOutputStream.write(objectMapper.writeValueAsBytes(result.getText())); + fileOutputStream.write(objectMapper.writeValueAsBytes(redactionStorageService.getText(TEST_PROJECT_ID, TEST_FILE_ID))); } int correctFound = 0; loop: - for (RedactionLogEntry redactionLogEntry : result.getRedactionLog().getRedactionLogEntry()) { - for (SectionText sectionText : result.getText().getSectionTexts()) { + for (RedactionLogEntry redactionLogEntry : redactionLog.getRedactionLogEntry()) { + for (SectionText sectionText : text.getSectionTexts()) { if (redactionLogEntry.isImage()) { correctFound++; continue loop; @@ -569,7 +552,7 @@ public class RedactionIntegrationTest { } } } - assertThat(correctFound).isEqualTo(result.getRedactionLog().getRedactionLogEntry().size()); + assertThat(correctFound).isEqualTo(redactionLog.getRedactionLogEntry().size()); dictionary.get(AUTHOR).add("properties"); reanlysisVersions.put("properties", 1L); @@ -585,20 +568,14 @@ public class RedactionIntegrationTest { when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(VERTEBRATE)); start = System.currentTimeMillis(); - ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder() - .redactionLog(result.getRedactionLog()) - .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) - .text(result.getText()) - .ruleSetId(TEST_RULESET_ID) - .build()); + AnalyzeResult reanalyzeResult = redactionController.reanalyze(request); end = System.currentTimeMillis(); System.out.println("reanalysis analysis duration: " + (end - start)); AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder() - .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) - .redactionLog(reanalyzeResult.getRedactionLog()) - .sectionGrid(result.getSectionGrid()) + .projectId(TEST_PROJECT_ID) + .fileId(TEST_FILE_ID) .build()); try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) { @@ -613,19 +590,13 @@ public class RedactionIntegrationTest { System.out.println("testTableRedaction"); long start = System.currentTimeMillis(); - ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf"); - - AnalyzeRequest request = AnalyzeRequest.builder() - .ruleSetId(TEST_RULESET_ID) - .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) - .build(); + AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf"); AnalyzeResult result = redactionController.analyze(request); AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder() - .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) - .redactionLog(result.getRedactionLog()) - .sectionGrid(result.getSectionGrid()) + .projectId(TEST_PROJECT_ID) + .fileId(TEST_FILE_ID) .build()); try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) { @@ -680,12 +651,9 @@ public class RedactionIntegrationTest { // manualRedactions.getEntriesToAdd().add(manualRedactionEntry); - AnalyzeRequest request = AnalyzeRequest.builder() - .ruleSetId(TEST_RULESET_ID) - .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) - .manualRedactions(manualRedactions) - .build(); + AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream()); + request.setManualRedactions(manualRedactions); AnalyzeResult result = redactionController.analyze(request); manualRedactions.getEntriesToAdd().add(manualRedactionEntry); @@ -694,20 +662,15 @@ public class RedactionIntegrationTest { .status(Status.APPROVED) .build())); - ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder() - .redactionLog(result.getRedactionLog()) - .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) - .manualRedactions(manualRedactions) - .text(result.getText()) - .ruleSetId(TEST_RULESET_ID) - .build()); + redactionController.reanalyze(request); + AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder() - .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) - .redactionLog(reanalyzeResult.getRedactionLog()) - .sectionGrid(result.getSectionGrid()) + .projectId(TEST_PROJECT_ID) + .fileId(TEST_FILE_ID) .build()); + try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) { fileOutputStream.write(annotateResponse.getDocument()); } @@ -724,11 +687,16 @@ public class RedactionIntegrationTest { System.out.println("classificationTest"); ClassPathResource pdfFileResource = new ClassPathResource("files/Trinexapac/93 Trinexapac-ethyl_RAR_03_Volume_3CA_B-1_2017-03-31.pdf"); - RedactionRequest request = RedactionRequest.builder() - .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) + + AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream()); + + RedactionRequest redactionRequest = RedactionRequest.builder() + .projectId(request.getProjectId()) + .fileId(request.getFileId()) + .ruleSetId(request.getRuleSetId()) .build(); - RedactionResult result = redactionController.classify(request); + RedactionResult result = redactionController.classify(redactionRequest); try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Classified.pdf")) { fileOutputStream.write(result.getDocument()); @@ -742,11 +710,15 @@ public class RedactionIntegrationTest { System.out.println("sectionsTest"); ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 " + "Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf"); - RedactionRequest request = RedactionRequest.builder() - .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) + AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream()); + + RedactionRequest redactionRequest = RedactionRequest.builder() + .projectId(request.getProjectId()) + .fileId(request.getFileId()) + .ruleSetId(request.getRuleSetId()) .build(); - RedactionResult result = redactionController.sections(request); + RedactionResult result = redactionController.sections(redactionRequest); try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Sections.pdf")) { fileOutputStream.write(result.getDocument()); @@ -760,11 +732,15 @@ public class RedactionIntegrationTest { System.out.println("htmlTablesTest"); ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf"); - RedactionRequest request = RedactionRequest.builder() - .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) + AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream()); + + RedactionRequest redactionRequest = RedactionRequest.builder() + .projectId(request.getProjectId()) + .fileId(request.getFileId()) + .ruleSetId(request.getRuleSetId()) .build(); - RedactionResult result = redactionController.htmlTables(request); + RedactionResult result = redactionController.htmlTables(redactionRequest); try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Tables.html")) { fileOutputStream.write(result.getDocument()); @@ -778,11 +754,15 @@ public class RedactionIntegrationTest { System.out.println("htmlTableRotationTest"); ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf"); - RedactionRequest request = RedactionRequest.builder() - .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) + AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream()); + + RedactionRequest redactionRequest = RedactionRequest.builder() + .projectId(request.getProjectId()) + .fileId(request.getFileId()) + .ruleSetId(request.getRuleSetId()) .build(); - RedactionResult result = redactionController.htmlTables(request); + RedactionResult result = redactionController.htmlTables(redactionRequest); try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Tables.html")) { fileOutputStream.write(result.getDocument()); @@ -795,20 +775,45 @@ public class RedactionIntegrationTest { ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Phantom Cells.pdf"); - AnalyzeRequest request = AnalyzeRequest.builder() - .ruleSetId(TEST_RULESET_ID) - .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) - .build(); + AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream()); AnalyzeResult result = redactionController.analyze(request); - result.getRedactionLog().getRedactionLogEntry().forEach(entry -> { + var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID); + + redactionLog.getRedactionLogEntry().forEach(entry -> { if (!entry.isHint()) { assertThat(entry.getReason()).isEqualTo("Not redacted because row is not a vertebrate study"); } }); } + @SneakyThrows + private AnalyzeRequest prepareStorage(String file) { + ClassPathResource pdfFileResource = new ClassPathResource(file); + + return prepareStorage(pdfFileResource.getInputStream()); + } + + + @SneakyThrows + private AnalyzeRequest prepareStorage(InputStream stream) { + + AnalyzeRequest request = AnalyzeRequest.builder() + .ruleSetId(TEST_RULESET_ID) + .projectId(TEST_PROJECT_ID) + .fileId(TEST_FILE_ID) + .lastProcessed(OffsetDateTime.now()) + .build(); + + var bytes = IOUtils.toByteArray(stream); + + storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_PROJECT_ID, TEST_FILE_ID, FileType.ORIGIN), bytes); + + return request; + + } + @Test public void sponsorCompanyTest() throws IOException { @@ -816,17 +821,14 @@ public class RedactionIntegrationTest { long start = System.currentTimeMillis(); ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/sponsor_companies.pdf"); - AnalyzeRequest request = AnalyzeRequest.builder() - .ruleSetId(TEST_RULESET_ID) - .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) - .build(); + + AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream()); AnalyzeResult result = redactionController.analyze(request); AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder() - .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) - .redactionLog(result.getRedactionLog()) - .sectionGrid(result.getSectionGrid()) + .projectId(TEST_PROJECT_ID) + .fileId(TEST_FILE_ID) .build()); try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) { @@ -857,4 +859,4 @@ public class RedactionIntegrationTest { } } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java index b7efed93..95c8ebf8 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java @@ -1,12 +1,8 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; -import com.iqser.red.service.configuration.v1.api.model.Colors; -import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry; -import com.iqser.red.service.configuration.v1.api.model.DictionaryResponse; -import com.iqser.red.service.configuration.v1.api.model.RulesResponse; -import com.iqser.red.service.configuration.v1.api.model.TypeResponse; -import com.iqser.red.service.configuration.v1.api.model.TypeResult; -import com.iqser.red.service.redaction.v1.model.RedactionRequest; +import com.amazonaws.services.s3.AmazonS3; +import com.iqser.red.service.configuration.v1.api.model.*; +import com.iqser.red.service.redaction.v1.server.InMemoryStorageService; import com.iqser.red.service.redaction.v1.server.classification.model.Document; import com.iqser.red.service.redaction.v1.server.client.DictionaryClient; import com.iqser.red.service.redaction.v1.server.client.RulesClient; @@ -14,7 +10,7 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils; import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader; import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService; -import org.apache.commons.io.IOUtils; +import com.iqser.red.storage.commons.service.StorageService; import org.apache.pdfbox.pdmodel.PDDocument; import org.junit.Before; import org.junit.Ignore; @@ -30,6 +26,7 @@ import org.springframework.boot.test.context.SpringBootTest; import org.springframework.boot.test.context.TestConfiguration; import org.springframework.boot.test.mock.mockito.MockBean; import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Primary; import org.springframework.core.io.ClassPathResource; import org.springframework.test.context.junit4.SpringRunner; @@ -40,15 +37,8 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Set; +import java.util.*; import java.util.concurrent.atomic.AtomicLong; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.Mockito.when; @@ -80,6 +70,9 @@ public class EntityRedactionServiceTest { @Autowired private DroolsExecutionService droolsExecutionService; + @MockBean + private AmazonS3 amazonS3; + private final static String TEST_RULESET_ID = "123"; @TestConfiguration @@ -101,6 +94,13 @@ public class EntityRedactionServiceTest { return kieServices.newKieContainer(kieModule.getReleaseId()); } + + @Bean + @Primary + public StorageService inmemoryStorage() { + return new InMemoryStorageService(); + } + } @@ -125,10 +125,6 @@ public class EntityRedactionServiceTest { ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf"); - RedactionRequest redactionRequest = RedactionRequest.builder() - .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) - .build(); - DictionaryResponse dictionaryResponse = DictionaryResponse.builder() .entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H."))) .build(); @@ -144,7 +140,7 @@ public class EntityRedactionServiceTest { .build(); when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse); - try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) { + try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) { Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument); entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null); assertThat(classifiedDoc.getEntities()).hasSize(1); // one page @@ -158,10 +154,6 @@ public class EntityRedactionServiceTest { ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/nested_redaction.pdf"); - RedactionRequest redactionRequest = RedactionRequest.builder() - .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) - .build(); - DictionaryResponse dictionaryResponse = DictionaryResponse.builder() .entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H."))) .build(); @@ -176,7 +168,7 @@ public class EntityRedactionServiceTest { .build(); when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse); - try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) { + try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) { Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument); entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null); assertThat(classifiedDoc.getEntities()).hasSize(1); // one page @@ -526,4 +518,4 @@ public class EntityRedactionServiceTest { return dictionaryEntries; } -} \ No newline at end of file +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java index 4f58b26d..4d83412a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java @@ -1,16 +1,14 @@ package com.iqser.red.service.redaction.v1.server.segmentation; -import static org.assertj.core.api.Assertions.assertThat; - -import java.io.ByteArrayOutputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.Collections; -import java.util.List; -import java.util.stream.Collectors; - -import javax.imageio.ImageIO; - +import com.amazonaws.services.s3.AmazonS3; +import com.iqser.red.service.redaction.v1.server.classification.model.Document; +import com.iqser.red.service.redaction.v1.server.classification.model.Page; +import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService; +import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage; +import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; +import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; +import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService; +import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService; import org.apache.pdfbox.pdmodel.PDDocument; import org.junit.Ignore; import org.junit.Test; @@ -22,15 +20,15 @@ import org.springframework.boot.test.mock.mockito.MockBean; import org.springframework.core.io.ClassPathResource; import org.springframework.test.context.junit4.SpringRunner; -import com.iqser.red.service.redaction.v1.server.classification.model.Document; -import com.iqser.red.service.redaction.v1.server.classification.model.Page; -import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService; -import com.iqser.red.service.redaction.v1.server.redaction.model.Image; -import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage; -import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; -import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; -import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService; -import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService; +import javax.imageio.ImageIO; +import java.io.ByteArrayOutputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +import static org.assertj.core.api.Assertions.assertThat; @SpringBootTest @RunWith(SpringRunner.class) @@ -51,6 +49,8 @@ public class PdfSegmentationServiceTest { @MockBean private KieContainer kieContainer; + @MockBean + private AmazonS3 amazonS3; @Test @Ignore @@ -76,6 +76,29 @@ public class PdfSegmentationServiceTest { } + @Test + public void testPDFSegmentationWithComplexTable() throws IOException { + + ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf"); + + try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) { + Document document = pdfSegmentationService.parseDocument(pdDocument); + assertThat(document.getParagraphs() + .stream() + .flatMap(paragraph -> paragraph.getTables().stream()) + .collect(Collectors.toList())).isNotEmpty(); + Table table = document.getParagraphs() + .stream() + .flatMap(paragraph -> paragraph.getTables().stream()) + .collect(Collectors.toList()) + .get(0); + assertThat(table.getColCount()).isEqualTo(6); + assertThat(table.getRowCount()).isEqualTo(13); + assertThat(table.getRows().stream().mapToInt(List::size).sum()).isEqualTo(6 * 13); + } + } + + @Test public void testTableExtraction() throws IOException {