Reworked re-analysis and analysis to use memory model / directly read/store files, and dumped pd doc wherever possible
This commit is contained in:
parent
ed59f36220
commit
5cb4ea287c
@ -5,7 +5,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<artifactId>platform-dependency</artifactId>
|
<artifactId>platform-dependency</artifactId>
|
||||||
<groupId>com.iqser.red</groupId>
|
<groupId>com.iqser.red</groupId>
|
||||||
<version>1.0.8</version>
|
<version>1.1.2</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
@ -32,7 +32,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.iqser.red</groupId>
|
<groupId>com.iqser.red</groupId>
|
||||||
<artifactId>platform-commons-dependency</artifactId>
|
<artifactId>platform-commons-dependency</artifactId>
|
||||||
<version>1.2.5</version>
|
<version>1.2.9</version>
|
||||||
<scope>import</scope>
|
<scope>import</scope>
|
||||||
<type>pom</type>
|
<type>pom</type>
|
||||||
</dependency>
|
</dependency>
|
||||||
@ -52,4 +52,4 @@
|
|||||||
|
|
||||||
</dependencyManagement>
|
</dependencyManagement>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|||||||
@ -5,13 +5,19 @@ import lombok.Builder;
|
|||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@Builder
|
@Builder
|
||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
public class AnalyzeRequest {
|
public class AnalyzeRequest {
|
||||||
|
|
||||||
private byte[] document;
|
private String projectId;
|
||||||
|
private String fileId;
|
||||||
private String ruleSetId;
|
private String ruleSetId;
|
||||||
private ManualRedactions manualRedactions;
|
private ManualRedactions manualRedactions;
|
||||||
|
private OffsetDateTime lastProcessed;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -12,8 +12,11 @@ import lombok.NoArgsConstructor;
|
|||||||
public class AnalyzeResult {
|
public class AnalyzeResult {
|
||||||
|
|
||||||
private int numberOfPages;
|
private int numberOfPages;
|
||||||
private RedactionLog redactionLog;
|
private boolean hasHints;
|
||||||
private SectionGrid sectionGrid;
|
private boolean hasRequests;
|
||||||
private Text text;
|
private boolean hasRedactions;
|
||||||
|
private boolean hasImages;
|
||||||
|
private boolean hasUpdates;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -11,7 +11,6 @@ import lombok.NoArgsConstructor;
|
|||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
public class AnnotateRequest {
|
public class AnnotateRequest {
|
||||||
|
|
||||||
private byte[] document;
|
private String projectId;
|
||||||
private RedactionLog redactionLog;
|
private String fileId;
|
||||||
private SectionGrid sectionGrid;
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,12 +1,12 @@
|
|||||||
package com.iqser.red.service.redaction.v1.model;
|
package com.iqser.red.service.redaction.v1.model;
|
||||||
|
|
||||||
import java.time.OffsetDateTime;
|
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Builder;
|
import lombok.Builder;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@Builder
|
@Builder
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
|
|||||||
@ -1,13 +1,13 @@
|
|||||||
package com.iqser.red.service.redaction.v1.model;
|
package com.iqser.red.service.redaction.v1.model;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Builder;
|
import lombok.Builder;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@Builder
|
@Builder
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
|
|||||||
@ -1,16 +1,16 @@
|
|||||||
package com.iqser.red.service.redaction.v1.model;
|
package com.iqser.red.service.redaction.v1.model;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
|
||||||
import lombok.Builder;
|
|
||||||
import lombok.Data;
|
|
||||||
import lombok.NoArgsConstructor;
|
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@Builder
|
@Builder
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
|
|||||||
@ -1,13 +1,13 @@
|
|||||||
package com.iqser.red.service.redaction.v1.model;
|
package com.iqser.red.service.redaction.v1.model;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Builder;
|
import lombok.Builder;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@Builder
|
@Builder
|
||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
|
|||||||
@ -11,7 +11,8 @@ import lombok.NoArgsConstructor;
|
|||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
public class RedactionRequest {
|
public class RedactionRequest {
|
||||||
|
|
||||||
private byte[] document;
|
private String projectId;
|
||||||
|
private String fileId;
|
||||||
private String ruleSetId;
|
private String ruleSetId;
|
||||||
private ManualRedactions manualRedactions;
|
private ManualRedactions manualRedactions;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -13,7 +13,5 @@ public class RedactionResult {
|
|||||||
|
|
||||||
private byte[] document;
|
private byte[] document;
|
||||||
private int numberOfPages;
|
private int numberOfPages;
|
||||||
private RedactionLog redactionLog;
|
|
||||||
private SectionGrid sectionGrid;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,22 +0,0 @@
|
|||||||
package com.iqser.red.service.redaction.v1.model;
|
|
||||||
|
|
||||||
import java.time.OffsetDateTime;
|
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
|
||||||
import lombok.Builder;
|
|
||||||
import lombok.Data;
|
|
||||||
import lombok.NoArgsConstructor;
|
|
||||||
|
|
||||||
@Data
|
|
||||||
@Builder
|
|
||||||
@NoArgsConstructor
|
|
||||||
@AllArgsConstructor
|
|
||||||
public class RenalyzeRequest {
|
|
||||||
|
|
||||||
private byte[] document;
|
|
||||||
private String ruleSetId;
|
|
||||||
private ManualRedactions manualRedactions;
|
|
||||||
private Text text;
|
|
||||||
private RedactionLog redactionLog;
|
|
||||||
private OffsetDateTime lastProcessed;
|
|
||||||
}
|
|
||||||
@ -27,7 +27,7 @@ public class SectionArea {
|
|||||||
private String header;
|
private String header;
|
||||||
|
|
||||||
public boolean contains(Rectangle other) {
|
public boolean contains(Rectangle other) {
|
||||||
return page == other.getPage() && this.topLeft.getX() <= other.getTopLeft().getX() && this.topLeft.getX() + this.getWidth() >= other.getTopLeft().getX() + other.getWidth() && this.getTopLeft().getY() <= other.getTopLeft().getY() && this.getTopLeft().getY() + this.getHeight() >= other.getTopLeft().getY() + other.getHeight();
|
return page == other.getPage() && this.topLeft.getX() <= other.getTopLeft().getX() && this.topLeft.getX() + this.getWidth() >= other.getTopLeft().getX() + other.getWidth() && this.getTopLeft().getY() <= other.getTopLeft().getY() && this.getTopLeft().getY() + this.getHeight() >= other.getTopLeft().getY() + other.getHeight();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,13 +1,13 @@
|
|||||||
package com.iqser.red.service.redaction.v1.model;
|
package com.iqser.red.service.redaction.v1.model;
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
|
|||||||
@ -1,13 +1,13 @@
|
|||||||
package com.iqser.red.service.redaction.v1.model;
|
package com.iqser.red.service.redaction.v1.model;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
import lombok.NonNull;
|
import lombok.NonNull;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
|
|||||||
@ -1,25 +0,0 @@
|
|||||||
package com.iqser.red.service.redaction.v1.model;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
|
||||||
import lombok.Builder;
|
|
||||||
import lombok.Data;
|
|
||||||
import lombok.NoArgsConstructor;
|
|
||||||
|
|
||||||
@Data
|
|
||||||
@Builder
|
|
||||||
@NoArgsConstructor
|
|
||||||
@AllArgsConstructor
|
|
||||||
public class SectionText {
|
|
||||||
|
|
||||||
private int sectionNumber;
|
|
||||||
private String text;
|
|
||||||
|
|
||||||
private boolean isTable;
|
|
||||||
private String headline;
|
|
||||||
|
|
||||||
private List<SectionArea> sectionAreas = new ArrayList<>();
|
|
||||||
|
|
||||||
}
|
|
||||||
@ -1,14 +1,6 @@
|
|||||||
package com.iqser.red.service.redaction.v1.resources;
|
package com.iqser.red.service.redaction.v1.resources;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
import com.iqser.red.service.redaction.v1.model.*;
|
||||||
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.AnnotateRequest;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.AnnotateResponse;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.ReanalyzeResult;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.RedactionResult;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.RenalyzeRequest;
|
|
||||||
|
|
||||||
import org.springframework.http.MediaType;
|
import org.springframework.http.MediaType;
|
||||||
import org.springframework.web.bind.annotation.PathVariable;
|
import org.springframework.web.bind.annotation.PathVariable;
|
||||||
import org.springframework.web.bind.annotation.PostMapping;
|
import org.springframework.web.bind.annotation.PostMapping;
|
||||||
@ -25,7 +17,7 @@ public interface RedactionResource {
|
|||||||
AnalyzeResult analyze(@RequestBody AnalyzeRequest analyzeRequest);
|
AnalyzeResult analyze(@RequestBody AnalyzeRequest analyzeRequest);
|
||||||
|
|
||||||
@PostMapping(value = "/reanalyze", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
@PostMapping(value = "/reanalyze", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
||||||
ReanalyzeResult reanalyze(@RequestBody RenalyzeRequest renalyzeRequest);
|
AnalyzeResult reanalyze(@RequestBody AnalyzeRequest renalyzeRequest);
|
||||||
|
|
||||||
@PostMapping(value = "/annotate", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
@PostMapping(value = "/annotate", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
||||||
AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest);
|
AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest);
|
||||||
@ -39,10 +31,10 @@ public interface RedactionResource {
|
|||||||
@PostMapping(value = "/debug/htmlTables", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
@PostMapping(value = "/debug/htmlTables", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
||||||
RedactionResult htmlTables(@RequestBody RedactionRequest redactionRequest);
|
RedactionResult htmlTables(@RequestBody RedactionRequest redactionRequest);
|
||||||
|
|
||||||
@PostMapping(value = "/rules/update"+RULE_SET_PATH_VARIABLE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
@PostMapping(value = "/rules/update" + RULE_SET_PATH_VARIABLE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
||||||
void updateRules(@PathVariable(RULE_SET_PARAMETER_NAME) String ruleSetId);
|
void updateRules(@PathVariable(RULE_SET_PARAMETER_NAME) String ruleSetId);
|
||||||
|
|
||||||
@PostMapping(value = "/rules/test", consumes = MediaType.APPLICATION_JSON_VALUE)
|
@PostMapping(value = "/rules/test", consumes = MediaType.APPLICATION_JSON_VALUE)
|
||||||
void testRules(@RequestBody String rules);
|
void testRules(@RequestBody String rules);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -12,6 +12,10 @@
|
|||||||
<artifactId>redaction-service-server-v1</artifactId>
|
<artifactId>redaction-service-server-v1</artifactId>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.iqser.red.commons</groupId>
|
||||||
|
<artifactId>storage-commons</artifactId>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.iqser.red.service</groupId>
|
<groupId>com.iqser.red.service</groupId>
|
||||||
<artifactId>redaction-service-api-v1</artifactId>
|
<artifactId>redaction-service-api-v1</artifactId>
|
||||||
@ -20,7 +24,12 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.iqser.red.service</groupId>
|
<groupId>com.iqser.red.service</groupId>
|
||||||
<artifactId>configuration-service-api-v1</artifactId>
|
<artifactId>configuration-service-api-v1</artifactId>
|
||||||
<version>2.2.9</version>
|
<version>2.5.0</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.iqser.red.service</groupId>
|
||||||
|
<artifactId>file-management-service-api-v1</artifactId>
|
||||||
|
<version>2.6.7</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.drools</groupId>
|
<groupId>org.drools</groupId>
|
||||||
|
|||||||
@ -1,5 +1,8 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server;
|
package com.iqser.red.service.redaction.v1.server;
|
||||||
|
|
||||||
|
import com.iqser.red.commons.spring.DefaultWebMvcConfiguration;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||||
import org.springframework.boot.SpringApplication;
|
import org.springframework.boot.SpringApplication;
|
||||||
import org.springframework.boot.actuate.autoconfigure.security.servlet.ManagementWebSecurityAutoConfiguration;
|
import org.springframework.boot.actuate.autoconfigure.security.servlet.ManagementWebSecurityAutoConfiguration;
|
||||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||||
@ -8,10 +11,6 @@ import org.springframework.boot.context.properties.EnableConfigurationProperties
|
|||||||
import org.springframework.cloud.openfeign.EnableFeignClients;
|
import org.springframework.cloud.openfeign.EnableFeignClients;
|
||||||
import org.springframework.context.annotation.Import;
|
import org.springframework.context.annotation.Import;
|
||||||
|
|
||||||
import com.iqser.red.commons.spring.DefaultWebMvcConfiguration;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
|
||||||
|
|
||||||
@Import({DefaultWebMvcConfiguration.class})
|
@Import({DefaultWebMvcConfiguration.class})
|
||||||
@EnableFeignClients(basePackageClasses = RulesClient.class)
|
@EnableFeignClients(basePackageClasses = RulesClient.class)
|
||||||
@EnableConfigurationProperties(RedactionServiceSettings.class)
|
@EnableConfigurationProperties(RedactionServiceSettings.class)
|
||||||
@ -23,4 +22,4 @@ public class Application {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,20 +1,18 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.classification.model;
|
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||||
|
|
||||||
|
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||||
|
import com.iqser.red.service.redaction.v1.model.SectionGrid;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.SectionGrid;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.SectionText;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
|
||||||
|
|
||||||
import lombok.Data;
|
|
||||||
import lombok.NoArgsConstructor;
|
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
public class Document {
|
public class Document {
|
||||||
|
|||||||
@ -1,5 +1,7 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.classification.model;
|
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
@ -7,38 +9,35 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import lombok.Getter;
|
public class FloatFrequencyCounter {
|
||||||
|
|
||||||
public class FloatFrequencyCounter
|
|
||||||
{
|
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
Map<Float, Integer> countPerValue = new HashMap<>();
|
Map<Float, Integer> countPerValue = new HashMap<>();
|
||||||
|
|
||||||
public void add(float value){
|
public void add(float value) {
|
||||||
if(!countPerValue.containsKey(value)){
|
if (!countPerValue.containsKey(value)) {
|
||||||
countPerValue.put(value, 1);
|
countPerValue.put(value, 1);
|
||||||
} else {
|
} else {
|
||||||
countPerValue.put(value, countPerValue.get(value) + 1);
|
countPerValue.put(value, countPerValue.get(value) + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addAll(Map<Float, Integer> otherCounter){
|
public void addAll(Map<Float, Integer> otherCounter) {
|
||||||
for(Map.Entry<Float, Integer> entry: otherCounter.entrySet()){
|
for (Map.Entry<Float, Integer> entry : otherCounter.entrySet()) {
|
||||||
if(countPerValue.containsKey(entry.getKey())){
|
if (countPerValue.containsKey(entry.getKey())) {
|
||||||
countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey())+ entry.getValue());
|
countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey()) + entry.getValue());
|
||||||
} else {
|
} else {
|
||||||
countPerValue.put(entry.getKey(), entry.getValue());
|
countPerValue.put(entry.getKey(), entry.getValue());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public Float getMostPopular(){
|
public Float getMostPopular() {
|
||||||
Map.Entry<Float, Integer> mostPopular = null;
|
Map.Entry<Float, Integer> mostPopular = null;
|
||||||
for(Map.Entry<Float, Integer> entry: countPerValue.entrySet()){
|
for (Map.Entry<Float, Integer> entry : countPerValue.entrySet()) {
|
||||||
if(mostPopular == null){
|
if (mostPopular == null) {
|
||||||
mostPopular = entry;
|
mostPopular = entry;
|
||||||
} else if(entry.getValue() >= mostPopular.getValue()){
|
} else if (entry.getValue() >= mostPopular.getValue()) {
|
||||||
mostPopular = entry;
|
mostPopular = entry;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -46,12 +45,11 @@ public class FloatFrequencyCounter
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public List<Float> getHighterThanMostPopular() {
|
||||||
public List<Float> getHighterThanMostPopular(){
|
|
||||||
Float mostPopular = getMostPopular();
|
Float mostPopular = getMostPopular();
|
||||||
List<Float> higher = new ArrayList<>();
|
List<Float> higher = new ArrayList<>();
|
||||||
for(Float value: countPerValue.keySet()){
|
for (Float value : countPerValue.keySet()) {
|
||||||
if(value > mostPopular){
|
if (value > mostPopular) {
|
||||||
higher.add(value);
|
higher.add(value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -60,12 +58,12 @@ public class FloatFrequencyCounter
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public Float getHighest(){
|
public Float getHighest() {
|
||||||
Float highest = null;
|
Float highest = null;
|
||||||
for(Float value: countPerValue.keySet()){
|
for (Float value : countPerValue.keySet()) {
|
||||||
if (highest == null){
|
if (highest == null) {
|
||||||
highest = value;
|
highest = value;
|
||||||
} else if(value > highest){
|
} else if (value > highest) {
|
||||||
highest = value;
|
highest = value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,12 +1,11 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.classification.model;
|
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
public class Footer {
|
public class Footer {
|
||||||
@ -21,4 +20,4 @@ public class Footer {
|
|||||||
return searchableText;
|
return searchableText;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,12 +1,11 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.classification.model;
|
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
public class Header {
|
public class Header {
|
||||||
@ -21,4 +20,4 @@ public class Header {
|
|||||||
return searchableText;
|
return searchableText;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,15 +1,14 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.classification.model;
|
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
||||||
|
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.NonNull;
|
import lombok.NonNull;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
public class Page {
|
public class Page {
|
||||||
@ -37,4 +36,4 @@ public class Page {
|
|||||||
return rotation != 0;
|
return rotation != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,19 +1,18 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.classification.model;
|
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||||
|
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
public class Paragraph implements Comparable{
|
public class Paragraph implements Comparable {
|
||||||
|
|
||||||
private List<AbstractTextContainer> pageBlocks = new ArrayList<>();
|
private List<AbstractTextContainer> pageBlocks = new ArrayList<>();
|
||||||
private List<PdfImage> images = new ArrayList<>();
|
private List<PdfImage> images = new ArrayList<>();
|
||||||
@ -62,4 +61,4 @@ public class Paragraph implements Comparable{
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -0,0 +1,45 @@
|
|||||||
|
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||||
|
|
||||||
|
import com.iqser.red.service.redaction.v1.model.SectionArea;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
@Builder
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
|
public class SectionText {
|
||||||
|
|
||||||
|
private int sectionNumber;
|
||||||
|
private String text;
|
||||||
|
|
||||||
|
private boolean isTable;
|
||||||
|
private String headline;
|
||||||
|
|
||||||
|
private List<SectionArea> sectionAreas = new ArrayList<>();
|
||||||
|
private Set<Image> images = new HashSet<>();
|
||||||
|
|
||||||
|
private List<TextBlock> textBlocks = new ArrayList<>();
|
||||||
|
private Map<String, CellValue> tabularData = new HashMap<>();
|
||||||
|
private List<Integer> cellStarts = new ArrayList<>();
|
||||||
|
|
||||||
|
|
||||||
|
public SearchableText getSearchableText() {
|
||||||
|
|
||||||
|
SearchableText searchableText = new SearchableText();
|
||||||
|
textBlocks.forEach(block -> {
|
||||||
|
if (block != null) {
|
||||||
|
searchableText.addAll(block.getSequences());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return searchableText;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -1,10 +1,10 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.classification.model;
|
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import lombok.Getter;
|
|
||||||
|
|
||||||
public class StringFrequencyCounter {
|
public class StringFrequencyCounter {
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
@ -46,4 +46,4 @@ public class StringFrequencyCounter {
|
|||||||
return mostPopular != null ? mostPopular.getKey() : null;
|
return mostPopular != null ? mostPopular.getKey() : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,17 +1,18 @@
|
|||||||
package com.iqser.red.service.redaction.v1.model;
|
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
public class Text {
|
public class Text {
|
||||||
|
|
||||||
|
private int numberOfPages;
|
||||||
private List<SectionText> sectionTexts = new ArrayList<>();
|
private List<SectionText> sectionTexts = new ArrayList<>();
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -1,16 +1,15 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.classification.model;
|
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Builder;
|
import lombok.Builder;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
@Builder
|
@Builder
|
||||||
@Data
|
@Data
|
||||||
@ -98,7 +97,6 @@ public class TextBlock extends AbstractTextContainer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
|
||||||
@ -139,4 +137,4 @@ public class TextBlock extends AbstractTextContainer {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,12 +1,11 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.classification.model;
|
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
public class UnclassifiedText {
|
public class UnclassifiedText {
|
||||||
@ -21,4 +20,4 @@ public class UnclassifiedText {
|
|||||||
return searchableText;
|
return searchableText;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,21 +1,20 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.classification.service;
|
package com.iqser.red.service.redaction.v1.server.classification.service;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.springframework.stereotype.Service;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.utils.PositionUtils;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.FloatFrequencyCounter;
|
import com.iqser.red.service.redaction.v1.server.classification.model.FloatFrequencyCounter;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.StringFrequencyCounter;
|
import com.iqser.red.service.redaction.v1.server.classification.model.StringFrequencyCounter;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.classification.utils.PositionUtils;
|
||||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
@SuppressWarnings("all")
|
@SuppressWarnings("all")
|
||||||
|
|||||||
@ -1,19 +1,17 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.classification.service;
|
package com.iqser.red.service.redaction.v1.server.classification.service;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import org.springframework.stereotype.Service;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.utils.PositionUtils;
|
import com.iqser.red.service.redaction.v1.server.classification.utils.PositionUtils;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Service
|
@Service
|
||||||
|
|||||||
@ -2,7 +2,6 @@ package com.iqser.red.service.redaction.v1.server.classification.utils;
|
|||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
||||||
|
|
||||||
import lombok.experimental.UtilityClass;
|
import lombok.experimental.UtilityClass;
|
||||||
|
|
||||||
@UtilityClass
|
@UtilityClass
|
||||||
|
|||||||
@ -1,16 +1,16 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.client;
|
package com.iqser.red.service.redaction.v1.server.client;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
|
|
||||||
import org.springframework.lang.NonNull;
|
import org.springframework.lang.NonNull;
|
||||||
import org.springframework.lang.Nullable;
|
import org.springframework.lang.Nullable;
|
||||||
import org.springframework.util.Assert;
|
import org.springframework.util.Assert;
|
||||||
import org.springframework.util.FileCopyUtils;
|
import org.springframework.util.FileCopyUtils;
|
||||||
import org.springframework.web.multipart.MultipartFile;
|
import org.springframework.web.multipart.MultipartFile;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
public class MockMultipartFile implements MultipartFile {
|
public class MockMultipartFile implements MultipartFile {
|
||||||
|
|
||||||
private final String name;
|
private final String name;
|
||||||
@ -22,13 +22,13 @@ public class MockMultipartFile implements MultipartFile {
|
|||||||
|
|
||||||
public MockMultipartFile(String name, @Nullable byte[] content) {
|
public MockMultipartFile(String name, @Nullable byte[] content) {
|
||||||
|
|
||||||
this(name, "", (String) null, (byte[]) content);
|
this(name, "", null, content);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public MockMultipartFile(String name, InputStream contentStream) throws IOException {
|
public MockMultipartFile(String name, InputStream contentStream) throws IOException {
|
||||||
|
|
||||||
this(name, "", (String) null, (byte[]) FileCopyUtils.copyToByteArray(contentStream));
|
this(name, "", null, FileCopyUtils.copyToByteArray(contentStream));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -78,7 +78,7 @@ public class MockMultipartFile implements MultipartFile {
|
|||||||
|
|
||||||
public long getSize() {
|
public long getSize() {
|
||||||
|
|
||||||
return (long) this.content.length;
|
return this.content.length;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,17 +1,15 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.controller;
|
package com.iqser.red.service.redaction.v1.server.controller;
|
||||||
|
|
||||||
import java.time.OffsetDateTime;
|
|
||||||
|
|
||||||
import com.iqser.red.commons.spring.ErrorMessage;
|
import com.iqser.red.commons.spring.ErrorMessage;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.springframework.http.HttpStatus;
|
import org.springframework.http.HttpStatus;
|
||||||
import org.springframework.web.bind.annotation.ExceptionHandler;
|
import org.springframework.web.bind.annotation.ExceptionHandler;
|
||||||
import org.springframework.web.bind.annotation.ResponseBody;
|
import org.springframework.web.bind.annotation.ResponseBody;
|
||||||
import org.springframework.web.bind.annotation.ResponseStatus;
|
import org.springframework.web.bind.annotation.ResponseStatus;
|
||||||
import org.springframework.web.bind.annotation.RestControllerAdvice;
|
import org.springframework.web.bind.annotation.RestControllerAdvice;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException;
|
import java.time.OffsetDateTime;
|
||||||
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@RestControllerAdvice
|
@RestControllerAdvice
|
||||||
@ -38,4 +36,4 @@ public class ControllerAdvice {
|
|||||||
return new ErrorMessage(OffsetDateTime.now(), e.getMessage());
|
return new ErrorMessage(OffsetDateTime.now(), e.getMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,17 +1,7 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.controller;
|
package com.iqser.red.service.redaction.v1.server.controller;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
import com.iqser.red.service.file.management.v1.api.model.FileType;
|
||||||
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
|
import com.iqser.red.service.redaction.v1.model.*;
|
||||||
import com.iqser.red.service.redaction.v1.model.AnnotateRequest;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.AnnotateResponse;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.ReanalyzeResult;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.RedactionLog;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.RedactionResult;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.RenalyzeRequest;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.SectionGrid;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.Text;
|
|
||||||
import com.iqser.red.service.redaction.v1.resources.RedactionResource;
|
import com.iqser.red.service.redaction.v1.resources.RedactionResource;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||||
@ -19,27 +9,21 @@ import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
|
|||||||
import com.iqser.red.service.redaction.v1.server.redaction.service.AnnotationService;
|
import com.iqser.red.service.redaction.v1.server.redaction.service.AnnotationService;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
|
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
|
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.service.EntityRedactionService;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.service.ImageClassificationService;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.service.ReanalyzeService;
|
import com.iqser.red.service.redaction.v1.server.redaction.service.ReanalyzeService;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogCreatorService;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||||
import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService;
|
import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService;
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
import org.springframework.web.bind.annotation.PathVariable;
|
import org.springframework.web.bind.annotation.PathVariable;
|
||||||
import org.springframework.web.bind.annotation.RequestBody;
|
import org.springframework.web.bind.annotation.RequestBody;
|
||||||
import org.springframework.web.bind.annotation.RestController;
|
import org.springframework.web.bind.annotation.RestController;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@RestController
|
@RestController
|
||||||
@ -47,61 +31,36 @@ import java.util.List;
|
|||||||
public class RedactionController implements RedactionResource {
|
public class RedactionController implements RedactionResource {
|
||||||
|
|
||||||
private final PdfVisualisationService pdfVisualisationService;
|
private final PdfVisualisationService pdfVisualisationService;
|
||||||
private final PdfSegmentationService pdfSegmentationService;
|
|
||||||
private final RedactionLogCreatorService redactionLogCreatorService;
|
|
||||||
private final EntityRedactionService entityRedactionService;
|
|
||||||
private final DroolsExecutionService droolsExecutionService;
|
private final DroolsExecutionService droolsExecutionService;
|
||||||
private final DictionaryService dictionaryService;
|
private final DictionaryService dictionaryService;
|
||||||
private final AnnotationService annotationService;
|
private final AnnotationService annotationService;
|
||||||
private final ReanalyzeService reanalyzeService;
|
private final ReanalyzeService reanalyzeService;
|
||||||
private final ImageClassificationService imageClassificationService;
|
private final PdfSegmentationService pdfSegmentationService;
|
||||||
|
private final RedactionStorageService redactionStorageService;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AnalyzeResult analyze(@RequestBody AnalyzeRequest analyzeRequest) {
|
public AnalyzeResult analyze(@RequestBody AnalyzeRequest analyzeRequest) {
|
||||||
|
return reanalyzeService.analyze(analyzeRequest);
|
||||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(analyzeRequest.getDocument()))) {
|
|
||||||
pdDocument.setAllSecurityToBeRemoved(true);
|
|
||||||
|
|
||||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
|
||||||
|
|
||||||
log.info("Document structure analysis successful, starting redaction analysis...");
|
|
||||||
|
|
||||||
imageClassificationService.classifyImages(classifiedDoc);
|
|
||||||
entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getRuleSetId(), analyzeRequest.getManualRedactions());
|
|
||||||
redactionLogCreatorService.createRedactionLog(classifiedDoc, pdDocument.getNumberOfPages(), analyzeRequest.getManualRedactions(), analyzeRequest
|
|
||||||
.getRuleSetId());
|
|
||||||
|
|
||||||
log.info("Redaction analysis successful...");
|
|
||||||
|
|
||||||
return AnalyzeResult.builder()
|
|
||||||
.sectionGrid(classifiedDoc.getSectionGrid())
|
|
||||||
.redactionLog(new RedactionLog(classifiedDoc.getRedactionLogEntities(), classifiedDoc.getDictionaryVersion(), classifiedDoc
|
|
||||||
.getRulesVersion(), analyzeRequest.getRuleSetId()))
|
|
||||||
.numberOfPages(classifiedDoc.getPages().size())
|
|
||||||
.text(new Text(classifiedDoc.getSectionText()))
|
|
||||||
.build();
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new RedactionException(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public ReanalyzeResult reanalyze(@RequestBody RenalyzeRequest renalyzeRequest) {
|
public AnalyzeResult reanalyze(@RequestBody AnalyzeRequest analyzeRequest) {
|
||||||
|
return reanalyzeService.reanalyze(analyzeRequest);
|
||||||
return reanalyzeService.reanalyze(renalyzeRequest);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest) {
|
public AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest) {
|
||||||
|
|
||||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(annotateRequest.getDocument()))) {
|
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(annotateRequest.getProjectId(), annotateRequest.getFileId(), FileType.ORIGIN));
|
||||||
|
var redactionLog = redactionStorageService.getRedactionLog(annotateRequest.getProjectId(), annotateRequest.getFileId());
|
||||||
|
var sectionsGrid = redactionStorageService.getSectionGrid(annotateRequest.getProjectId(), annotateRequest.getFileId());
|
||||||
|
|
||||||
|
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
|
||||||
|
|
||||||
pdDocument.setAllSecurityToBeRemoved(true);
|
pdDocument.setAllSecurityToBeRemoved(true);
|
||||||
dictionaryService.updateDictionary(annotateRequest.getRedactionLog().getRuleSetId());
|
dictionaryService.updateDictionary(redactionLog.getRuleSetId());
|
||||||
annotationService.annotate(pdDocument, annotateRequest.getRedactionLog(), annotateRequest.getSectionGrid());
|
annotationService.annotate(pdDocument, redactionLog, sectionsGrid);
|
||||||
|
|
||||||
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
|
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
|
||||||
pdDocument.save(byteArrayOutputStream);
|
pdDocument.save(byteArrayOutputStream);
|
||||||
@ -115,15 +74,16 @@ public class RedactionController implements RedactionResource {
|
|||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public RedactionResult classify(@RequestBody RedactionRequest pdfSegmentationRequest) {
|
public RedactionResult classify(@RequestBody RedactionRequest redactionRequest) {
|
||||||
|
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getProjectId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||||
|
|
||||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(pdfSegmentationRequest.getDocument()))) {
|
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
|
||||||
pdDocument.setAllSecurityToBeRemoved(true);
|
pdDocument.setAllSecurityToBeRemoved(true);
|
||||||
|
|
||||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||||
pdfVisualisationService.visualizeClassifications(classifiedDoc, pdDocument);
|
pdfVisualisationService.visualizeClassifications(classifiedDoc, pdDocument);
|
||||||
|
|
||||||
return convert(pdDocument, classifiedDoc.getPages().size(), pdfSegmentationRequest.getRuleSetId());
|
return convert(pdDocument, classifiedDoc.getPages().size());
|
||||||
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RedactionException(e);
|
throw new RedactionException(e);
|
||||||
@ -134,14 +94,15 @@ public class RedactionController implements RedactionResource {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public RedactionResult sections(@RequestBody RedactionRequest redactionRequest) {
|
public RedactionResult sections(@RequestBody RedactionRequest redactionRequest) {
|
||||||
|
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getProjectId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||||
|
|
||||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
|
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
|
||||||
pdDocument.setAllSecurityToBeRemoved(true);
|
pdDocument.setAllSecurityToBeRemoved(true);
|
||||||
|
|
||||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||||
pdfVisualisationService.visualizeParagraphs(classifiedDoc, pdDocument);
|
pdfVisualisationService.visualizeParagraphs(classifiedDoc, pdDocument);
|
||||||
|
|
||||||
return convert(pdDocument, classifiedDoc.getPages().size(), redactionRequest.getRuleSetId());
|
return convert(pdDocument, classifiedDoc.getPages().size());
|
||||||
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RedactionException(e);
|
throw new RedactionException(e);
|
||||||
@ -153,27 +114,29 @@ public class RedactionController implements RedactionResource {
|
|||||||
@Override
|
@Override
|
||||||
public RedactionResult htmlTables(@RequestBody RedactionRequest redactionRequest) {
|
public RedactionResult htmlTables(@RequestBody RedactionRequest redactionRequest) {
|
||||||
|
|
||||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
|
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getProjectId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||||
|
|
||||||
|
Document classifiedDoc;
|
||||||
|
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
|
||||||
pdDocument.setAllSecurityToBeRemoved(true);
|
pdDocument.setAllSecurityToBeRemoved(true);
|
||||||
|
classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
} catch (Exception e) {
|
||||||
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
for (Page page : classifiedDoc.getPages()) {
|
|
||||||
for (AbstractTextContainer textContainer : page.getTextBlocks()) {
|
|
||||||
if (textContainer instanceof Table) {
|
|
||||||
Table table = (Table) textContainer;
|
|
||||||
sb.append(table.getTextAsHtml()).append("<br />").append("<br />");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return RedactionResult.builder().document(sb.toString().getBytes()).build();
|
|
||||||
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RedactionException(e);
|
throw new RedactionException(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (Page page : classifiedDoc.getPages()) {
|
||||||
|
for (AbstractTextContainer textContainer : page.getTextBlocks()) {
|
||||||
|
if (textContainer instanceof Table) {
|
||||||
|
Table table = (Table) textContainer;
|
||||||
|
sb.append(table.getTextAsHtml()).append("<br />").append("<br />");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return RedactionResult.builder().document(sb.toString().getBytes()).build();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -191,23 +154,13 @@ public class RedactionController implements RedactionResource {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private RedactionResult convert(PDDocument document, int numberOfPages, String ruleSetId) throws IOException {
|
private RedactionResult convert(PDDocument document, int numberOfPages) throws IOException {
|
||||||
|
|
||||||
return convert(document, numberOfPages, null, null, 0, 0, ruleSetId);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private RedactionResult convert(PDDocument document, int numberOfPages,
|
|
||||||
List<RedactionLogEntry> redactionLogEntities, SectionGrid sectionGrid,
|
|
||||||
long dictionaryVersion, long rulesVersion, String ruleSetId) throws IOException {
|
|
||||||
|
|
||||||
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
|
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
|
||||||
document.save(byteArrayOutputStream);
|
document.save(byteArrayOutputStream);
|
||||||
return RedactionResult.builder()
|
return RedactionResult.builder()
|
||||||
.document(byteArrayOutputStream.toByteArray())
|
.document(byteArrayOutputStream.toByteArray())
|
||||||
.numberOfPages(numberOfPages)
|
.numberOfPages(numberOfPages)
|
||||||
.redactionLog(new RedactionLog(redactionLogEntities, dictionaryVersion, rulesVersion, ruleSetId))
|
|
||||||
.sectionGrid(sectionGrid)
|
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,17 +1,15 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.parsing;
|
package com.iqser.red.service.redaction.v1.server.parsing;
|
||||||
|
|
||||||
|
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.Setter;
|
||||||
|
import org.apache.pdfbox.text.PDFTextStripperByArea;
|
||||||
|
import org.apache.pdfbox.text.TextPosition;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.pdfbox.text.PDFTextStripperByArea;
|
|
||||||
import org.apache.pdfbox.text.TextPosition;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
|
||||||
|
|
||||||
import lombok.Getter;
|
|
||||||
import lombok.Setter;
|
|
||||||
|
|
||||||
public class PDFAreaTextStripper extends PDFTextStripperByArea {
|
public class PDFAreaTextStripper extends PDFTextStripperByArea {
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
@ -76,7 +74,7 @@ public class PDFAreaTextStripper extends PDFTextStripperByArea {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void clearPositions(){
|
public void clearPositions() {
|
||||||
textPositionSequences = new ArrayList<>();
|
textPositionSequences = new ArrayList<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,33 +1,15 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.parsing;
|
package com.iqser.red.service.redaction.v1.server.parsing;
|
||||||
|
|
||||||
import java.awt.geom.AffineTransform;
|
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||||
import java.awt.geom.Point2D;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||||
import java.awt.geom.Rectangle2D;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
||||||
import java.io.IOException;
|
import lombok.Getter;
|
||||||
import java.util.ArrayList;
|
import lombok.Setter;
|
||||||
import java.util.List;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
import org.apache.pdfbox.contentstream.operator.Operator;
|
import org.apache.pdfbox.contentstream.operator.Operator;
|
||||||
import org.apache.pdfbox.contentstream.operator.OperatorName;
|
import org.apache.pdfbox.contentstream.operator.OperatorName;
|
||||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColor;
|
import org.apache.pdfbox.contentstream.operator.color.*;
|
||||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorN;
|
import org.apache.pdfbox.contentstream.operator.state.*;
|
||||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorSpace;
|
|
||||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceCMYKColor;
|
|
||||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceGrayColor;
|
|
||||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceRGBColor;
|
|
||||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColor;
|
|
||||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorN;
|
|
||||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorSpace;
|
|
||||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceCMYKColor;
|
|
||||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceGrayColor;
|
|
||||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceRGBColor;
|
|
||||||
import org.apache.pdfbox.contentstream.operator.state.SetFlatness;
|
|
||||||
import org.apache.pdfbox.contentstream.operator.state.SetLineCapStyle;
|
|
||||||
import org.apache.pdfbox.contentstream.operator.state.SetLineDashPattern;
|
|
||||||
import org.apache.pdfbox.contentstream.operator.state.SetLineJoinStyle;
|
|
||||||
import org.apache.pdfbox.contentstream.operator.state.SetLineMiterLimit;
|
|
||||||
import org.apache.pdfbox.contentstream.operator.state.SetLineWidth;
|
|
||||||
import org.apache.pdfbox.contentstream.operator.state.SetRenderingIntent;
|
|
||||||
import org.apache.pdfbox.contentstream.operator.text.SetFontAndSize;
|
import org.apache.pdfbox.contentstream.operator.text.SetFontAndSize;
|
||||||
import org.apache.pdfbox.cos.COSBase;
|
import org.apache.pdfbox.cos.COSBase;
|
||||||
import org.apache.pdfbox.cos.COSName;
|
import org.apache.pdfbox.cos.COSName;
|
||||||
@ -40,40 +22,31 @@ import org.apache.pdfbox.text.PDFTextStripper;
|
|||||||
import org.apache.pdfbox.text.TextPosition;
|
import org.apache.pdfbox.text.TextPosition;
|
||||||
import org.apache.pdfbox.util.Matrix;
|
import org.apache.pdfbox.util.Matrix;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
import java.awt.geom.AffineTransform;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
import java.awt.geom.Point2D;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
import java.awt.geom.Rectangle2D;
|
||||||
|
import java.io.IOException;
|
||||||
import lombok.Getter;
|
import java.util.ArrayList;
|
||||||
import lombok.Setter;
|
import java.util.List;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class PDFLinesTextStripper extends PDFTextStripper {
|
public class PDFLinesTextStripper extends PDFTextStripper {
|
||||||
|
|
||||||
@Setter
|
|
||||||
protected PDPage pdpage;
|
|
||||||
|
|
||||||
@Getter
|
|
||||||
private int minCharWidth;
|
|
||||||
|
|
||||||
@Getter
|
|
||||||
private int maxCharWidth;
|
|
||||||
|
|
||||||
@Getter
|
|
||||||
private int minCharHeight;
|
|
||||||
|
|
||||||
@Getter
|
|
||||||
private int maxCharHeight;
|
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
private final List<TextPositionSequence> textPositionSequences = new ArrayList<>();
|
private final List<TextPositionSequence> textPositionSequences = new ArrayList<>();
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
private final List<Ruling> rulings = new ArrayList<>();
|
private final List<Ruling> rulings = new ArrayList<>();
|
||||||
|
|
||||||
private final List<Ruling> graphicsPath = new ArrayList<>();
|
private final List<Ruling> graphicsPath = new ArrayList<>();
|
||||||
|
@Setter
|
||||||
|
protected PDPage pdpage;
|
||||||
|
@Getter
|
||||||
|
private int minCharWidth;
|
||||||
|
@Getter
|
||||||
|
private int maxCharWidth;
|
||||||
|
@Getter
|
||||||
|
private int minCharHeight;
|
||||||
|
@Getter
|
||||||
|
private int maxCharHeight;
|
||||||
@Getter
|
@Getter
|
||||||
private List<PdfImage> images = new ArrayList<>();
|
private List<PdfImage> images = new ArrayList<>();
|
||||||
|
|
||||||
@ -369,4 +342,4 @@ public class PDFLinesTextStripper extends PDFTextStripper {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,23 +1,20 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.parsing.model;
|
package com.iqser.red.service.redaction.v1.server.parsing.model;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.pdfbox.text.TextPosition;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.model.Point;
|
import com.iqser.red.service.redaction.v1.model.Point;
|
||||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||||
|
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.apache.pdfbox.text.TextPosition;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
public class TextPositionSequence implements CharSequence {
|
public class TextPositionSequence implements CharSequence {
|
||||||
|
|
||||||
private List<TextPosition> textPositions = new ArrayList<>();
|
|
||||||
|
|
||||||
private final int page;
|
private final int page;
|
||||||
|
private List<TextPosition> textPositions = new ArrayList<>();
|
||||||
|
|
||||||
|
|
||||||
public TextPositionSequence(List<TextPosition> textPositions, int page) {
|
public TextPositionSequence(List<TextPosition> textPositions, int page) {
|
||||||
@ -223,4 +220,4 @@ public class TextPositionSequence implements CharSequence {
|
|||||||
return new Rectangle(new Point(posXInit, posYInit), posXEnd - posXInit, posYEnd - posYInit + height, page);
|
return new Rectangle(new Point(posXInit, posYInit), posXEnd - posXInit, posYEnd - posYInit + height, page);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,14 +1,13 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||||
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||||
|
|
||||||
import lombok.Value;
|
import lombok.Value;
|
||||||
|
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@Value
|
@Value
|
||||||
public class CellValue {
|
public class CellValue {
|
||||||
|
|
||||||
@ -47,4 +46,4 @@ public class CellValue {
|
|||||||
.replaceAll(" {2}", " ");
|
.replaceAll(" {2}", " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,13 +1,13 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||||
|
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.Getter;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import lombok.Data;
|
|
||||||
import lombok.Getter;
|
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
public class Dictionary {
|
public class Dictionary {
|
||||||
|
|
||||||
@ -21,15 +21,15 @@ public class Dictionary {
|
|||||||
private long version;
|
private long version;
|
||||||
|
|
||||||
|
|
||||||
public Dictionary(List<DictionaryModel> dictionaryModels, long dictionaryVersion){
|
public Dictionary(List<DictionaryModel> dictionaryModels, long dictionaryVersion) {
|
||||||
this.dictionaryModels = dictionaryModels;
|
this.dictionaryModels = dictionaryModels;
|
||||||
this.dictionaryModels.forEach(dm -> localAccessMap.put(dm.getType(), dm));
|
this.dictionaryModels.forEach(dm -> localAccessMap.put(dm.getType(), dm));
|
||||||
this.version = dictionaryVersion;
|
this.version = dictionaryVersion;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public int getDictionaryRank(String type){
|
public int getDictionaryRank(String type) {
|
||||||
if(!localAccessMap.containsKey(type)){
|
if (!localAccessMap.containsKey(type)) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return localAccessMap.get(type).getRank();
|
return localAccessMap.get(type).getRank();
|
||||||
@ -60,7 +60,7 @@ public class Dictionary {
|
|||||||
|
|
||||||
public boolean containsValue(String type, String value) {
|
public boolean containsValue(String type, String value) {
|
||||||
|
|
||||||
if (localAccessMap.containsKey(type) && localAccessMap.get(type)
|
return localAccessMap.containsKey(type) && localAccessMap.get(type)
|
||||||
.getEntries()
|
.getEntries()
|
||||||
.contains(value) || localAccessMap.containsKey(type) && localAccessMap.get(type)
|
.contains(value) || localAccessMap.containsKey(type) && localAccessMap.get(type)
|
||||||
.getLocalEntries()
|
.getLocalEntries()
|
||||||
@ -68,10 +68,7 @@ public class Dictionary {
|
|||||||
.getEntries()
|
.getEntries()
|
||||||
.contains(value) || localAccessMap.containsKey(RECOMMENDATION_PREFIX + type) && localAccessMap.get(RECOMMENDATION_PREFIX + type)
|
.contains(value) || localAccessMap.containsKey(RECOMMENDATION_PREFIX + type) && localAccessMap.get(RECOMMENDATION_PREFIX + type)
|
||||||
.getLocalEntries()
|
.getLocalEntries()
|
||||||
.contains(value)) {
|
.contains(value);
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,10 +1,10 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||||
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
public class DictionaryIncrement {
|
public class DictionaryIncrement {
|
||||||
|
|||||||
@ -1,15 +1,14 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||||
|
|
||||||
|
|
||||||
|
import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Data;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
|
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
|
||||||
import lombok.Data;
|
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
public class DictionaryModel implements Serializable {
|
public class DictionaryModel implements Serializable {
|
||||||
@ -23,8 +22,8 @@ public class DictionaryModel implements Serializable {
|
|||||||
private Set<DictionaryEntry> entries;
|
private Set<DictionaryEntry> entries;
|
||||||
private Set<String> localEntries;
|
private Set<String> localEntries;
|
||||||
|
|
||||||
public Set<String> getValues(boolean local){
|
public Set<String> getValues(boolean local) {
|
||||||
return local ? localEntries : entries.stream().filter(e -> !e.isDeleted()).map(e-> e.getValue()).collect(Collectors
|
return local ? localEntries : entries.stream().filter(e -> !e.isDeleted()).map(e -> e.getValue()).collect(Collectors
|
||||||
.toSet());
|
.toSet());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -20,5 +20,4 @@ public class DictionaryRepresentation {
|
|||||||
private Map<String, DictionaryModel> localAccessMap = new HashMap<>();
|
private Map<String, DictionaryModel> localAccessMap = new HashMap<>();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,13 +1,12 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||||
|
|
||||||
|
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.EqualsAndHashCode;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
|
||||||
|
|
||||||
import lombok.Data;
|
|
||||||
import lombok.EqualsAndHashCode;
|
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
|
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
|
||||||
public class Entity {
|
public class Entity {
|
||||||
|
|||||||
@ -1,24 +1,23 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.EqualsAndHashCode;
|
import lombok.EqualsAndHashCode;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
@EqualsAndHashCode
|
@EqualsAndHashCode
|
||||||
public class EntityPositionSequence {
|
public class EntityPositionSequence {
|
||||||
|
|
||||||
|
private final String id;
|
||||||
@EqualsAndHashCode.Exclude
|
@EqualsAndHashCode.Exclude
|
||||||
private List<TextPositionSequence> sequences = new ArrayList<>();
|
private List<TextPositionSequence> sequences = new ArrayList<>();
|
||||||
private int pageNumber;
|
private int pageNumber;
|
||||||
private final String id;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,12 +1,12 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||||
|
|
||||||
import java.awt.geom.Rectangle2D;
|
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Builder;
|
import lombok.Builder;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
|
import java.awt.geom.Rectangle2D;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@Builder
|
@Builder
|
||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
|
|||||||
@ -1,14 +1,14 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||||
|
|
||||||
import java.awt.geom.Rectangle2D;
|
|
||||||
import java.awt.image.BufferedImage;
|
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
import lombok.NonNull;
|
import lombok.NonNull;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
|
|
||||||
|
import java.awt.geom.Rectangle2D;
|
||||||
|
import java.awt.image.BufferedImage;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
@ -25,4 +25,4 @@ public class PdfImage {
|
|||||||
@NonNull
|
@NonNull
|
||||||
private int page;
|
private int page;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,37 +0,0 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
|
||||||
|
|
||||||
import lombok.Data;
|
|
||||||
import lombok.NoArgsConstructor;
|
|
||||||
|
|
||||||
@Data
|
|
||||||
@NoArgsConstructor
|
|
||||||
public class ReanalysisSection {
|
|
||||||
|
|
||||||
private int sectionNumber;
|
|
||||||
private String headline;
|
|
||||||
private List<TextBlock> textBlocks;
|
|
||||||
private Map<String, CellValue> tabularData = new HashMap<>();
|
|
||||||
private List<Integer> cellStarts;
|
|
||||||
private Set<Image> images = new HashSet<>();
|
|
||||||
|
|
||||||
|
|
||||||
public SearchableText getSearchableText() {
|
|
||||||
|
|
||||||
SearchableText searchableText = new SearchableText();
|
|
||||||
textBlocks.forEach(block -> {
|
|
||||||
if (block instanceof TextBlock) {
|
|
||||||
searchableText.addAll(block.getSequences());
|
|
||||||
}
|
|
||||||
});
|
|
||||||
return searchableText;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
@ -1,14 +1,14 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||||
|
|
||||||
|
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
|
||||||
|
|
||||||
public class SearchableText {
|
public class SearchableText {
|
||||||
|
|
||||||
private final List<TextPositionSequence> sequences = new ArrayList<>();
|
private final List<TextPositionSequence> sequences = new ArrayList<>();
|
||||||
@ -232,4 +232,4 @@ public class SearchableText {
|
|||||||
return sb.append("\n").toString();
|
return sb.append("\n").toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,6 +1,12 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||||
|
|
||||||
import static com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary.RECOMMENDATION_PREFIX;
|
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
@ -11,15 +17,7 @@ import java.util.regex.Matcher;
|
|||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import static com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary.RECOMMENDATION_PREFIX;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns;
|
|
||||||
|
|
||||||
import lombok.Builder;
|
|
||||||
import lombok.Data;
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@Slf4j
|
@Slf4j
|
||||||
|
|||||||
@ -0,0 +1,41 @@
|
|||||||
|
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||||
|
|
||||||
|
import com.iqser.red.service.file.management.v1.api.model.RedactionChangeLog;
|
||||||
|
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
|
||||||
|
import com.iqser.red.service.redaction.v1.model.RedactionLog;
|
||||||
|
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
@Service
|
||||||
|
public class AnalyzeResponseService {
|
||||||
|
|
||||||
|
public AnalyzeResult createAnalyzeResponse(int pageCount, RedactionLog redactionLog, RedactionChangeLog redactionChangeLog) {
|
||||||
|
boolean hasHints = redactionLog.getRedactionLogEntry().stream().anyMatch(RedactionLogEntry::isHint);
|
||||||
|
|
||||||
|
boolean hasRequests = redactionLog.getRedactionLogEntry()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(entry -> entry.isManual() && entry.getStatus()
|
||||||
|
.equals(com.iqser.red.service.redaction.v1.model.Status.REQUESTED));
|
||||||
|
|
||||||
|
boolean hasRedactions = redactionLog.getRedactionLogEntry()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(entry -> entry.isRedacted() && !entry.isManual() || entry.isManual() && entry.getStatus()
|
||||||
|
.equals(com.iqser.red.service.redaction.v1.model.Status.APPROVED));
|
||||||
|
|
||||||
|
boolean hasImages = redactionLog.getRedactionLogEntry()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(entry -> entry.isHint() && entry.getType().equals("image"));
|
||||||
|
|
||||||
|
boolean hasUpdates = redactionChangeLog != null && redactionChangeLog.getRedactionLogEntry() != null && !redactionChangeLog
|
||||||
|
.getRedactionLogEntry()
|
||||||
|
.isEmpty() && redactionChangeLog.getRedactionLogEntry().stream().anyMatch(entry -> !entry.getType().equals("false_positive"));
|
||||||
|
|
||||||
|
return AnalyzeResult.builder()
|
||||||
|
.numberOfPages(pageCount)
|
||||||
|
.hasHints(hasHints)
|
||||||
|
.hasRedactions(hasRedactions)
|
||||||
|
.hasRequests(hasRequests)
|
||||||
|
.hasImages(hasImages)
|
||||||
|
.hasUpdates(hasUpdates).build();
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,14 +1,7 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||||
|
|
||||||
import java.awt.Color;
|
import com.iqser.red.service.redaction.v1.model.*;
|
||||||
import java.io.IOException;
|
import lombok.RequiredArgsConstructor;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.GregorianCalendar;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
import org.apache.pdfbox.pdmodel.PDPage;
|
import org.apache.pdfbox.pdmodel.PDPage;
|
||||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||||
@ -21,15 +14,14 @@ import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationText;
|
|||||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup;
|
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.model.CellRectangle;
|
import java.awt.Color;
|
||||||
import com.iqser.red.service.redaction.v1.model.Comment;
|
import java.io.IOException;
|
||||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
import java.util.ArrayList;
|
||||||
import com.iqser.red.service.redaction.v1.model.RedactionLog;
|
import java.util.GregorianCalendar;
|
||||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
import java.util.HashMap;
|
||||||
import com.iqser.red.service.redaction.v1.model.SectionGrid;
|
import java.util.List;
|
||||||
import com.iqser.red.service.redaction.v1.model.SectionRectangle;
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
import lombok.RequiredArgsConstructor;
|
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
|
|||||||
@ -1,20 +1,5 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||||
|
|
||||||
import java.awt.Color;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Locale;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.apache.commons.collections4.CollectionUtils;
|
|
||||||
import org.apache.commons.lang3.SerializationUtils;
|
|
||||||
import org.springframework.stereotype.Service;
|
|
||||||
|
|
||||||
import com.iqser.red.service.configuration.v1.api.model.Colors;
|
import com.iqser.red.service.configuration.v1.api.model.Colors;
|
||||||
import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
|
import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
|
||||||
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
||||||
@ -25,10 +10,16 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncre
|
|||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryRepresentation;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryRepresentation;
|
||||||
|
|
||||||
import feign.FeignException;
|
import feign.FeignException;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.apache.commons.collections4.CollectionUtils;
|
||||||
|
import org.apache.commons.lang3.SerializationUtils;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import java.awt.Color;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Service
|
@Service
|
||||||
@ -37,7 +28,7 @@ public class DictionaryService {
|
|||||||
|
|
||||||
private final DictionaryClient dictionaryClient;
|
private final DictionaryClient dictionaryClient;
|
||||||
|
|
||||||
private Map<String, DictionaryRepresentation> dictionariesByRuleSets = new HashMap<>();
|
private final Map<String, DictionaryRepresentation> dictionariesByRuleSets = new HashMap<>();
|
||||||
|
|
||||||
|
|
||||||
public long updateDictionary(String ruleSetId) {
|
public long updateDictionary(String ruleSetId) {
|
||||||
@ -212,4 +203,4 @@ public class DictionaryService {
|
|||||||
return dictionariesByRuleSets.get(ruleSetId).getRequestAddColor();
|
return dictionariesByRuleSets.get(ruleSetId).getRequestAddColor();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,11 +1,10 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
|
||||||
import java.io.InputStream;
|
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||||
import java.nio.charset.StandardCharsets;
|
import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException;
|
||||||
import java.util.HashMap;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
||||||
import java.util.Map;
|
import lombok.RequiredArgsConstructor;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.kie.api.KieServices;
|
import org.kie.api.KieServices;
|
||||||
import org.kie.api.builder.KieBuilder;
|
import org.kie.api.builder.KieBuilder;
|
||||||
@ -15,12 +14,11 @@ import org.kie.api.runtime.KieContainer;
|
|||||||
import org.kie.api.runtime.KieSession;
|
import org.kie.api.runtime.KieSession;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
|
import java.io.ByteArrayInputStream;
|
||||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
import java.io.InputStream;
|
||||||
import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException;
|
import java.nio.charset.StandardCharsets;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
import lombok.RequiredArgsConstructor;
|
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
@ -28,9 +26,9 @@ public class DroolsExecutionService {
|
|||||||
|
|
||||||
private final RulesClient rulesClient;
|
private final RulesClient rulesClient;
|
||||||
|
|
||||||
private Map<String, KieContainer> kieContainers = new HashMap<>();
|
private final Map<String, KieContainer> kieContainers = new HashMap<>();
|
||||||
|
|
||||||
private Map<String, Long> rulesVersionPerRuleSetId = new HashMap<>();
|
private final Map<String, Long> rulesVersionPerRuleSetId = new HashMap<>();
|
||||||
|
|
||||||
|
|
||||||
public KieContainer getKieContainer(String ruleSetId) {
|
public KieContainer getKieContainer(String ruleSetId) {
|
||||||
@ -133,4 +131,4 @@ public class DroolsExecutionService {
|
|||||||
return rulesVersion.longValue();
|
return rulesVersion.longValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,50 +1,27 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Locale;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
|
|
||||||
import org.apache.commons.collections4.CollectionUtils;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import org.kie.api.runtime.KieContainer;
|
|
||||||
import org.springframework.stereotype.Service;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
|
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
|
||||||
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
|
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
|
||||||
import com.iqser.red.service.redaction.v1.model.Point;
|
import com.iqser.red.service.redaction.v1.model.Point;
|
||||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||||
import com.iqser.red.service.redaction.v1.model.SectionArea;
|
import com.iqser.red.service.redaction.v1.model.SectionArea;
|
||||||
import com.iqser.red.service.redaction.v1.model.SectionText;
|
import com.iqser.red.service.redaction.v1.server.classification.model.*;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Footer;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Header;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.UnclassifiedText;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.apache.commons.collections4.CollectionUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.kie.api.runtime.KieContainer;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Service
|
@Service
|
||||||
|
|||||||
@ -1,21 +1,18 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||||
|
|
||||||
import java.io.ByteArrayOutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import javax.imageio.ImageIO;
|
|
||||||
|
|
||||||
import org.springframework.stereotype.Service;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||||
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient;
|
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient;
|
||||||
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationResponse;
|
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationResponse;
|
||||||
import com.iqser.red.service.redaction.v1.server.client.MockMultipartFile;
|
import com.iqser.red.service.redaction.v1.server.client.MockMultipartFile;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType;
|
||||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import javax.imageio.ImageIO;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Service
|
@Service
|
||||||
|
|||||||
@ -1,53 +1,29 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||||
|
|
||||||
import java.awt.geom.Rectangle2D;
|
import com.iqser.red.service.file.management.v1.api.model.FileType;
|
||||||
import java.io.ByteArrayInputStream;
|
import com.iqser.red.service.redaction.v1.model.*;
|
||||||
import java.util.ArrayList;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||||
import java.util.HashMap;
|
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||||
import java.util.HashSet;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Text;
|
||||||
import java.util.Iterator;
|
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
|
||||||
import java.util.List;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||||
import java.util.Map;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
|
||||||
import java.util.Set;
|
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||||
import java.util.stream.Collectors;
|
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||||
import java.util.stream.Stream;
|
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
import org.apache.pdfbox.pdmodel.PDPage;
|
|
||||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
|
||||||
import org.kie.api.runtime.KieContainer;
|
import org.kie.api.runtime.KieContainer;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import org.springframework.web.bind.annotation.RequestBody;
|
import org.springframework.web.bind.annotation.RequestBody;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.model.Comment;
|
import java.awt.geom.Rectangle2D;
|
||||||
import com.iqser.red.service.redaction.v1.model.IdRemoval;
|
import java.util.*;
|
||||||
import com.iqser.red.service.redaction.v1.model.ManualForceRedact;
|
import java.util.stream.Collectors;
|
||||||
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
|
import java.util.stream.Stream;
|
||||||
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.ReanalyzeResult;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.RenalyzeRequest;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.SectionArea;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.SectionText;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.parsing.PDFAreaTextStripper;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.ReanalysisSection;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
@Service
|
@Service
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
public class ReanalyzeService {
|
public class ReanalyzeService {
|
||||||
@ -57,13 +33,63 @@ public class ReanalyzeService {
|
|||||||
private final SurroundingWordsService surroundingWordsService;
|
private final SurroundingWordsService surroundingWordsService;
|
||||||
private final EntityRedactionService entityRedactionService;
|
private final EntityRedactionService entityRedactionService;
|
||||||
private final RedactionLogCreatorService redactionLogCreatorService;
|
private final RedactionLogCreatorService redactionLogCreatorService;
|
||||||
|
private final RedactionStorageService redactionStorageService;
|
||||||
|
private final PdfSegmentationService pdfSegmentationService;
|
||||||
|
private final ImageClassificationService imageClassificationService;
|
||||||
|
private final RedactionChangeLogService redactionChangeLogService;
|
||||||
|
private final AnalyzeResponseService analyzeResponseService;
|
||||||
|
|
||||||
|
public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) {
|
||||||
|
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.ORIGIN));
|
||||||
|
|
||||||
|
var pageCount = 0;
|
||||||
|
Document classifiedDoc;
|
||||||
|
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
|
||||||
|
pdDocument.setAllSecurityToBeRemoved(true);
|
||||||
|
pageCount = pdDocument.getNumberOfPages();
|
||||||
|
classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RedactionException(e);
|
||||||
|
}
|
||||||
|
log.info("Document structure analysis successful, starting redaction analysis...");
|
||||||
|
|
||||||
|
imageClassificationService.classifyImages(classifiedDoc);
|
||||||
|
entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getRuleSetId(), analyzeRequest.getManualRedactions());
|
||||||
|
imageClassificationService.classifyImages(classifiedDoc);
|
||||||
|
redactionLogCreatorService.createRedactionLog(classifiedDoc, pageCount, analyzeRequest.getManualRedactions(), analyzeRequest
|
||||||
|
.getRuleSetId());
|
||||||
|
|
||||||
|
log.info("Redaction analysis successful...");
|
||||||
|
|
||||||
|
|
||||||
public ReanalyzeResult reanalyze(@RequestBody RenalyzeRequest renalyzeRequest) {
|
var redactionLog = new RedactionLog(classifiedDoc.getRedactionLogEntities(), classifiedDoc.getDictionaryVersion(), classifiedDoc
|
||||||
|
.getRulesVersion(), analyzeRequest.getRuleSetId());
|
||||||
|
|
||||||
DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(renalyzeRequest.getRuleSetId(), renalyzeRequest
|
// first create changelog - this only happens when we migrate files analyzed via the old process and we don't want to loose changeLog data
|
||||||
.getRedactionLog()
|
var changeLog = redactionChangeLogService.createAndStoreChangeLog(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), redactionLog);
|
||||||
.getDictionaryVersion());
|
// store redactionLog
|
||||||
|
redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLog);
|
||||||
|
redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.TEXT, new Text(pageCount, classifiedDoc.getSectionText()));
|
||||||
|
redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, classifiedDoc.getSectionGrid());
|
||||||
|
|
||||||
|
return analyzeResponseService.createAnalyzeResponse(pageCount, redactionLog, changeLog);
|
||||||
|
}
|
||||||
|
|
||||||
|
public AnalyzeResult reanalyze(@RequestBody AnalyzeRequest renalyzeRequest) {
|
||||||
|
var text = redactionStorageService.getText(renalyzeRequest.getProjectId(), renalyzeRequest.getFileId());
|
||||||
|
// new procedure was not applied, we need a complete analysis
|
||||||
|
if (text.getNumberOfPages() == 0) {
|
||||||
|
return analyze(AnalyzeRequest.builder()
|
||||||
|
.ruleSetId(renalyzeRequest.getRuleSetId())
|
||||||
|
.manualRedactions(renalyzeRequest.getManualRedactions())
|
||||||
|
.projectId(renalyzeRequest.getProjectId())
|
||||||
|
.fileId(renalyzeRequest.getFileId())
|
||||||
|
.build());
|
||||||
|
}
|
||||||
|
var redactionLog = redactionStorageService.getRedactionLog(renalyzeRequest.getProjectId(), renalyzeRequest.getFileId());
|
||||||
|
|
||||||
|
|
||||||
|
DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(renalyzeRequest.getRuleSetId(), redactionLog.getDictionaryVersion());
|
||||||
|
|
||||||
Set<String> manualForceAndRemoveIds = getForceAndRemoveIds(renalyzeRequest.getManualRedactions());
|
Set<String> manualForceAndRemoveIds = getForceAndRemoveIds(renalyzeRequest.getManualRedactions());
|
||||||
Map<String, List<Comment>> comments = null;
|
Map<String, List<Comment>> comments = null;
|
||||||
@ -75,21 +101,21 @@ public class ReanalyzeService {
|
|||||||
manualAdds = renalyzeRequest.getManualRedactions().getEntriesToAdd();
|
manualAdds = renalyzeRequest.getManualRedactions().getEntriesToAdd();
|
||||||
}
|
}
|
||||||
|
|
||||||
Set<Integer> sectionsToReanaylse = new HashSet<>();
|
Set<Integer> sectionsToReanalyse = new HashSet<>();
|
||||||
Map<Integer, Set<Image>> imageEntries = new HashMap<>();
|
Map<Integer, Set<Image>> imageEntries = new HashMap<>();
|
||||||
for (RedactionLogEntry entry : renalyzeRequest.getRedactionLog().getRedactionLogEntry()) {
|
for (RedactionLogEntry entry : redactionLog.getRedactionLogEntry()) {
|
||||||
if (entry.isManual() || manualForceAndRemoveIds.contains(entry.getId())) {
|
if (entry.isManual() || manualForceAndRemoveIds.contains(entry.getId())) {
|
||||||
sectionsToReanaylse.add(entry.getSectionNumber());
|
sectionsToReanalyse.add(entry.getSectionNumber());
|
||||||
}
|
}
|
||||||
if (entry.isImage() || entry.getType().equals("image")) {
|
if (entry.isImage() || entry.getType().equals("image")) {
|
||||||
imageEntries.computeIfAbsent(entry.getSectionNumber(), x -> new HashSet<>()).add(convert(entry));
|
imageEntries.computeIfAbsent(entry.getSectionNumber(), x -> new HashSet<>()).add(convert(entry));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (SectionText sectionText : renalyzeRequest.getText().getSectionTexts()) {
|
for (SectionText sectionText : text.getSectionTexts()) {
|
||||||
|
|
||||||
if (EntitySearchUtils.sectionContainsAny(sectionText.getText(), dictionaryIncrement.getValues())) {
|
if (EntitySearchUtils.sectionContainsAny(sectionText.getText(), dictionaryIncrement.getValues())) {
|
||||||
sectionsToReanaylse.add(sectionText.getSectionNumber());
|
sectionsToReanalyse.add(sectionText.getSectionNumber());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (manualAdds != null) {
|
if (manualAdds != null) {
|
||||||
@ -106,97 +132,30 @@ public class ReanalyzeService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sectionsToReanaylse.isEmpty() && (manualAdds == null || manualAdds.isEmpty())) {
|
if (sectionsToReanalyse.isEmpty() && (manualAdds == null || manualAdds.isEmpty())) {
|
||||||
renalyzeRequest.getRedactionLog().setDictionaryVersion(dictionaryIncrement.getDictionaryVersion());
|
redactionLog.setDictionaryVersion(dictionaryIncrement.getDictionaryVersion());
|
||||||
return ReanalyzeResult.builder().redactionLog(renalyzeRequest.getRedactionLog()).build();
|
var changeLog = redactionChangeLogService.createAndStoreChangeLog(renalyzeRequest.getProjectId(), renalyzeRequest.getFileId(), redactionLog);
|
||||||
|
redactionStorageService.storeObject(renalyzeRequest.getProjectId(), renalyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLog);
|
||||||
|
return analyzeResponseService.createAnalyzeResponse(text.getNumberOfPages(), redactionLog, changeLog);
|
||||||
}
|
}
|
||||||
|
|
||||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(renalyzeRequest.getDocument()))) {
|
try {
|
||||||
|
|
||||||
List<ReanalysisSection> reanalysisSections = new ArrayList<>();
|
List<SectionText> reanalysisSections = new ArrayList<>();
|
||||||
for (SectionText sectionText : renalyzeRequest.getText().getSectionTexts()) {
|
for (SectionText sectionText : text.getSectionTexts()) {
|
||||||
|
|
||||||
if (!sectionsToReanaylse.contains(sectionText.getSectionNumber())) {
|
if (sectionsToReanalyse.contains(sectionText.getSectionNumber())) {
|
||||||
continue;
|
reanalysisSections.add(sectionText);
|
||||||
}
|
}
|
||||||
|
|
||||||
ReanalysisSection reanalysisSection = new ReanalysisSection();
|
|
||||||
reanalysisSection.setHeadline(sectionText.getHeadline());
|
|
||||||
reanalysisSection.setSectionNumber(sectionText.getSectionNumber());
|
|
||||||
List<TextBlock> textBlocks = new ArrayList<>();
|
|
||||||
|
|
||||||
Map<Integer, List<SectionArea>> sectionAreasPerPage = new HashMap<>();
|
|
||||||
for (SectionArea sectionArea : sectionText.getSectionAreas()) {
|
|
||||||
sectionAreasPerPage.computeIfAbsent(sectionArea.getPage(), (x) -> new ArrayList<>())
|
|
||||||
.add(sectionArea);
|
|
||||||
}
|
|
||||||
|
|
||||||
Map<String, CellValue> tabularData = new HashMap<>();
|
|
||||||
List<Integer> cellStarts = new ArrayList<>();
|
|
||||||
for (Integer page : sectionAreasPerPage.keySet()) {
|
|
||||||
List<SectionArea> areasOnPage = sectionAreasPerPage.get(page);
|
|
||||||
|
|
||||||
PDPage pdPage = pdDocument.getPage(page - 1);
|
|
||||||
PDRectangle cropBox = pdPage.getCropBox();
|
|
||||||
PDFAreaTextStripper textStripper = new PDFAreaTextStripper();
|
|
||||||
textStripper.setPageNumber(page);
|
|
||||||
|
|
||||||
int cellStart = 0;
|
|
||||||
for (SectionArea sectionArea : areasOnPage) {
|
|
||||||
|
|
||||||
Rectangle2D rect = null;
|
|
||||||
if (pdPage.getRotation() == 90) {
|
|
||||||
rect = new Rectangle2D.Float(sectionArea.getTopLeft().getY(), sectionArea.getTopLeft()
|
|
||||||
.getX(), sectionArea.getHeight(), sectionArea.getWidth() + 0.001f);
|
|
||||||
} else {
|
|
||||||
rect = new Rectangle2D.Float(sectionArea.getTopLeft().getX(), -sectionArea.getTopLeft()
|
|
||||||
.getY() + cropBox.getUpperRightY() - sectionArea.getHeight(), sectionArea.getWidth(), sectionArea
|
|
||||||
.getHeight() + 0.001f);
|
|
||||||
}
|
|
||||||
|
|
||||||
textStripper.addRegion(String.valueOf(1), rect);
|
|
||||||
textStripper.extractRegions(pdPage);
|
|
||||||
textStripper.getTextForRegion(String.valueOf(1));
|
|
||||||
List<TextPositionSequence> positions = textStripper.getTextPositionSequences();
|
|
||||||
|
|
||||||
TextBlock textBlock = new TextBlock(sectionArea.getTopLeft().getX(), sectionArea.getTopLeft()
|
|
||||||
.getX() + sectionArea.getWidth(), sectionArea.getTopLeft()
|
|
||||||
.getY(), sectionArea.getTopLeft().getY() + sectionArea.getHeight(), positions, 0);
|
|
||||||
|
|
||||||
if (sectionText.isTable()) {
|
|
||||||
Cell cell = new Cell();
|
|
||||||
cell.addTextBlock(textBlock);
|
|
||||||
tabularData.put(sectionArea.getHeader(), new CellValue(cell.getTextBlocks(), cellStart));
|
|
||||||
cellStarts.add(cellStart);
|
|
||||||
cellStart = cellStart + cell.toString().trim().length() + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
textBlocks.add(textBlock);
|
|
||||||
textStripper.clearPositions();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
reanalysisSection.setTextBlocks(textBlocks);
|
|
||||||
reanalysisSection.setTabularData(tabularData);
|
|
||||||
|
|
||||||
if (sectionText.isTable()) {
|
|
||||||
reanalysisSection.setCellStarts(cellStarts);
|
|
||||||
}
|
|
||||||
if (imageEntries.containsKey(sectionText.getSectionNumber())) {
|
|
||||||
reanalysisSection.getImages().addAll(imageEntries.get(sectionText.getSectionNumber()));
|
|
||||||
}
|
|
||||||
|
|
||||||
reanalysisSections.add(reanalysisSection);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//--
|
|
||||||
|
|
||||||
KieContainer kieContainer = droolsExecutionService.updateRules(renalyzeRequest.getRuleSetId());
|
KieContainer kieContainer = droolsExecutionService.updateRules(renalyzeRequest.getRuleSetId());
|
||||||
|
|
||||||
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(renalyzeRequest.getRuleSetId());
|
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(renalyzeRequest.getRuleSetId());
|
||||||
|
|
||||||
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
|
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
|
||||||
for (ReanalysisSection reanalysisSection : reanalysisSections) {
|
for (SectionText reanalysisSection : reanalysisSections) {
|
||||||
|
|
||||||
Set<Entity> entities = entityRedactionService.findEntities(reanalysisSection.getSearchableText(), reanalysisSection
|
Set<Entity> entities = entityRedactionService.findEntities(reanalysisSection.getSearchableText(), reanalysisSection
|
||||||
.getHeadline(), reanalysisSection.getSectionNumber(), dictionary, false);
|
.getHeadline(), reanalysisSection.getSectionNumber(), dictionary, false);
|
||||||
@ -254,7 +213,7 @@ public class ReanalyzeService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
List<RedactionLogEntry> newRedactionLogEntries = new ArrayList<>();
|
List<RedactionLogEntry> newRedactionLogEntries = new ArrayList<>();
|
||||||
for (int page = 1; page <= pdDocument.getNumberOfPages(); page++) {
|
for (int page = 1; page <= text.getNumberOfPages(); page++) {
|
||||||
if (entitiesPerPage.get(page) != null) {
|
if (entitiesPerPage.get(page) != null) {
|
||||||
newRedactionLogEntries.addAll(redactionLogCreatorService.addEntries(entitiesPerPage, renalyzeRequest
|
newRedactionLogEntries.addAll(redactionLogCreatorService.addEntries(entitiesPerPage, renalyzeRequest
|
||||||
.getManualRedactions(), page, renalyzeRequest.getRuleSetId()));
|
.getManualRedactions(), page, renalyzeRequest.getRuleSetId()));
|
||||||
@ -269,19 +228,14 @@ public class ReanalyzeService {
|
|||||||
.getRuleSetId()));
|
.getRuleSetId()));
|
||||||
}
|
}
|
||||||
|
|
||||||
Iterator<RedactionLogEntry> itty = renalyzeRequest.getRedactionLog().getRedactionLogEntry().iterator();
|
redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.isImage() || entry.getSectionNumber() == 0 && !entry.isImage());
|
||||||
while (itty.hasNext()) {
|
redactionLog.getRedactionLogEntry().addAll(newRedactionLogEntries);
|
||||||
RedactionLogEntry entry = itty.next();
|
redactionLog.setDictionaryVersion(dictionaryIncrement.getDictionaryVersion());
|
||||||
if (sectionsToReanaylse.contains(entry.getSectionNumber())) {
|
|
||||||
itty.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
renalyzeRequest.getRedactionLog().getRedactionLogEntry().addAll(newRedactionLogEntries);
|
var changeLog = redactionChangeLogService.createAndStoreChangeLog(renalyzeRequest.getProjectId(), renalyzeRequest.getFileId(), redactionLog);
|
||||||
|
redactionStorageService.storeObject(renalyzeRequest.getProjectId(), renalyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLog);
|
||||||
|
return analyzeResponseService.createAnalyzeResponse(text.getNumberOfPages(), redactionLog, changeLog);
|
||||||
|
|
||||||
renalyzeRequest.getRedactionLog().setDictionaryVersion(dictionaryIncrement.getDictionaryVersion());
|
|
||||||
|
|
||||||
return ReanalyzeResult.builder().redactionLog(renalyzeRequest.getRedactionLog()).build();
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new RedactionException(e);
|
throw new RedactionException(e);
|
||||||
|
|||||||
@ -0,0 +1,94 @@
|
|||||||
|
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.iqser.red.service.file.management.v1.api.model.ChangeType;
|
||||||
|
import com.iqser.red.service.file.management.v1.api.model.FileType;
|
||||||
|
import com.iqser.red.service.file.management.v1.api.model.RedactionChangeLog;
|
||||||
|
import com.iqser.red.service.file.management.v1.api.model.RedactionChangeLogEntry;
|
||||||
|
import com.iqser.red.service.redaction.v1.model.RedactionLog;
|
||||||
|
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class RedactionChangeLogService {
|
||||||
|
|
||||||
|
private final RedactionStorageService storageStorageService;
|
||||||
|
private final ObjectMapper objectMapper;
|
||||||
|
|
||||||
|
public RedactionChangeLog createAndStoreChangeLog(String projectId, String fileId, RedactionLog currentRedactionLog) {
|
||||||
|
|
||||||
|
try {
|
||||||
|
RedactionLog previousRedactionLog = storageStorageService.getRedactionLog(projectId, fileId);
|
||||||
|
var changeLog = createChangeLog(currentRedactionLog, previousRedactionLog);
|
||||||
|
storageStorageService.storeObject(projectId, fileId, FileType.REDACTION_CHANGELOG, objectMapper.writeValueAsBytes(changeLog));
|
||||||
|
return changeLog;
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("Previous redaction log not available");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private RedactionChangeLog createChangeLog(RedactionLog currentRedactionLog, RedactionLog previousRedactionLog) {
|
||||||
|
|
||||||
|
|
||||||
|
if (previousRedactionLog == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
List<RedactionLogEntry> added = new ArrayList<>(currentRedactionLog.getRedactionLogEntry());
|
||||||
|
added.removeAll(previousRedactionLog.getRedactionLogEntry());
|
||||||
|
|
||||||
|
List<RedactionLogEntry> removed = new ArrayList<>(previousRedactionLog.getRedactionLogEntry());
|
||||||
|
removed.removeAll(currentRedactionLog.getRedactionLogEntry());
|
||||||
|
|
||||||
|
List<RedactionChangeLogEntry> changeLogEntries = added.stream()
|
||||||
|
.map(entry -> convert(entry, ChangeType.ADDED))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
changeLogEntries.addAll(removed.stream()
|
||||||
|
.map(entry -> convert(entry, ChangeType.REMOVED))
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
|
||||||
|
return new RedactionChangeLog(changeLogEntries, currentRedactionLog.getDictionaryVersion(), currentRedactionLog.getRulesVersion(), currentRedactionLog
|
||||||
|
.getRuleSetId(), currentRedactionLog.getFilename());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private RedactionChangeLogEntry convert(RedactionLogEntry entry, ChangeType changeType) {
|
||||||
|
|
||||||
|
return RedactionChangeLogEntry.builder()
|
||||||
|
.id(entry.getId())
|
||||||
|
.type(entry.getType())
|
||||||
|
.value(entry.getValue())
|
||||||
|
.reason(entry.getReason())
|
||||||
|
.matchedRule(entry.getMatchedRule())
|
||||||
|
.legalBasis(entry.getLegalBasis())
|
||||||
|
.redacted(entry.isRedacted())
|
||||||
|
.isHint(entry.isHint())
|
||||||
|
.isRecommendation(entry.isRecommendation())
|
||||||
|
.section(entry.getSection())
|
||||||
|
.color(entry.getColor())
|
||||||
|
.positions(entry.getPositions())
|
||||||
|
.sectionNumber(entry.getSectionNumber())
|
||||||
|
.manual(entry.isManual())
|
||||||
|
.status(entry.getStatus())
|
||||||
|
.manualRedactionType(entry.getManualRedactionType())
|
||||||
|
.isDictionaryEntry(entry.isDictionaryEntry())
|
||||||
|
.textBefore(entry.getTextBefore())
|
||||||
|
.textAfter(entry.getTextAfter())
|
||||||
|
.comments(entry.getComments())
|
||||||
|
.changeType(changeType)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -1,28 +1,6 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import com.iqser.red.service.redaction.v1.model.*;
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.apache.commons.collections4.CollectionUtils;
|
|
||||||
import org.apache.pdfbox.text.TextPosition;
|
|
||||||
import org.springframework.stereotype.Service;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.model.CellRectangle;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.Comment;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.IdRemoval;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.ManualForceRedact;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.ManualRedactionType;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.Point;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.SectionRectangle;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.Status;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||||
@ -34,8 +12,17 @@ import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
|||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.apache.commons.collections4.CollectionUtils;
|
||||||
|
import org.apache.pdfbox.text.TextPosition;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
|
|||||||
@ -1,25 +1,17 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Locale;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||||
|
|
||||||
import lombok.experimental.UtilityClass;
|
import lombok.experimental.UtilityClass;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@UtilityClass
|
@UtilityClass
|
||||||
@SuppressWarnings("PMD")
|
@SuppressWarnings("PMD")
|
||||||
@ -46,7 +38,7 @@ public class EntitySearchUtils {
|
|||||||
|
|
||||||
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString
|
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString
|
||||||
.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
|
.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
|
||||||
if(value.isCaseinsensitive() || !value.isCaseinsensitive() && sectionText.substring(startIndex, stopIndex).equals(value.getValue())){
|
if (value.isCaseinsensitive() || !value.isCaseinsensitive() && sectionText.substring(startIndex, stopIndex).equals(value.getValue())) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -147,16 +139,16 @@ public class EntitySearchUtils {
|
|||||||
|
|
||||||
public void addEntitiesWithHigherRank(Set<Entity> entities, Entity found, Dictionary dictionary) {
|
public void addEntitiesWithHigherRank(Set<Entity> entities, Entity found, Dictionary dictionary) {
|
||||||
|
|
||||||
if(entities.contains(found)){
|
if (entities.contains(found)) {
|
||||||
Entity existing = entities.stream().filter(entity -> entity.equals(found)).findFirst().get();
|
Entity existing = entities.stream().filter(entity -> entity.equals(found)).findFirst().get();
|
||||||
if (dictionary.getDictionaryRank(existing.getType()) <= dictionary.getDictionaryRank(found.getType())){
|
if (dictionary.getDictionaryRank(existing.getType()) <= dictionary.getDictionaryRank(found.getType())) {
|
||||||
entities.remove(found);
|
entities.remove(found);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
entities.add(found);
|
entities.add(found);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addEntitiesIgnoreRank(Set<Entity> entities, Set<Entity> found){
|
public void addEntitiesIgnoreRank(Set<Entity> entities, Set<Entity> found) {
|
||||||
// HashSet keeps old value but we want the new.
|
// HashSet keeps old value but we want the new.
|
||||||
entities.removeAll(found);
|
entities.removeAll(found);
|
||||||
entities.addAll(found);
|
entities.addAll(found);
|
||||||
|
|||||||
@ -1,15 +1,14 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
||||||
|
|
||||||
import java.awt.geom.Rectangle2D;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import com.google.common.hash.HashFunction;
|
import com.google.common.hash.HashFunction;
|
||||||
import com.google.common.hash.Hashing;
|
import com.google.common.hash.Hashing;
|
||||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||||
|
|
||||||
import lombok.experimental.UtilityClass;
|
import lombok.experimental.UtilityClass;
|
||||||
|
|
||||||
|
import java.awt.geom.Rectangle2D;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@UtilityClass
|
@UtilityClass
|
||||||
public class IdBuilder {
|
public class IdBuilder {
|
||||||
|
|
||||||
@ -26,7 +25,7 @@ public class IdBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public String buildId(Rectangle2D rectangle2D, int page){
|
public String buildId(Rectangle2D rectangle2D, int page) {
|
||||||
|
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
sb.append("x").append(rectangle2D.getX()).append("y").append(rectangle2D.getY()).append("h").append(rectangle2D.getHeight()).append("w").append(rectangle2D.getWidth()).append("p").append(page);
|
sb.append("x").append(rectangle2D.getX()).append("y").append(rectangle2D.getY()).append("h").append(rectangle2D.getHeight()).append("w").append(rectangle2D.getWidth()).append("p").append(page);
|
||||||
@ -35,5 +34,4 @@ public class IdBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,7 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
||||||
|
|
||||||
|
import lombok.experimental.UtilityClass;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
@ -8,8 +10,6 @@ import java.nio.charset.StandardCharsets;
|
|||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import lombok.experimental.UtilityClass;
|
|
||||||
|
|
||||||
@UtilityClass
|
@UtilityClass
|
||||||
public class ResourceLoader {
|
public class ResourceLoader {
|
||||||
|
|
||||||
@ -27,4 +27,4 @@ public class ResourceLoader {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -7,6 +7,7 @@ public class TextNormalizationUtilities {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Revert hyphenation due to line breaks.
|
* Revert hyphenation due to line breaks.
|
||||||
|
*
|
||||||
* @param text Text to be processed.
|
* @param text Text to be processed.
|
||||||
* @return Text without line-break hyphenation.
|
* @return Text without line-break hyphenation.
|
||||||
*/
|
*/
|
||||||
@ -14,4 +15,4 @@ public class TextNormalizationUtilities {
|
|||||||
return text.replaceAll("([^\\s\\d\\-]{2,})[\\-\\u00AD]\\R|\n\r(.+ )", "$1$2");
|
return text.replaceAll("([^\\s\\d\\-]{2,})[\\-\\u00AD]\\R|\n\r(.+ )", "$1$2");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,28 +1,36 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.segmentation;
|
package com.iqser.red.service.redaction.v1.server.segmentation;
|
||||||
|
|
||||||
import java.io.IOException;
|
import com.iqser.red.service.redaction.v1.model.SectionArea;
|
||||||
import java.util.ArrayList;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||||
import java.util.List;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.classification.service.ClassificationService;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.parsing.PDFAreaTextStripper;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.parsing.PDFLinesTextStripper;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
import org.apache.pdfbox.pdmodel.PDPage;
|
import org.apache.pdfbox.pdmodel.PDPage;
|
||||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
import java.awt.geom.Rectangle2D;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
import java.io.IOException;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
import java.util.ArrayList;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
|
import java.util.HashMap;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.service.ClassificationService;
|
import java.util.List;
|
||||||
import com.iqser.red.service.redaction.v1.server.parsing.PDFLinesTextStripper;
|
import java.util.Map;
|
||||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
|
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Service
|
@Service
|
||||||
@ -36,6 +44,82 @@ public class PdfSegmentationService {
|
|||||||
private final SectionsBuilderService sectionsBuilderService;
|
private final SectionsBuilderService sectionsBuilderService;
|
||||||
|
|
||||||
|
|
||||||
|
private final RedactionStorageService redactionStorageService;
|
||||||
|
|
||||||
|
|
||||||
|
private void postProcessSections(PDDocument pdDocument, List<SectionText> texts) {
|
||||||
|
|
||||||
|
try {
|
||||||
|
for (SectionText sectionText : texts) {
|
||||||
|
|
||||||
|
List<TextBlock> textBlocks = new ArrayList<>();
|
||||||
|
|
||||||
|
Map<Integer, List<SectionArea>> sectionAreasPerPage = new HashMap<>();
|
||||||
|
for (SectionArea sectionArea : sectionText.getSectionAreas()) {
|
||||||
|
sectionAreasPerPage.computeIfAbsent(sectionArea.getPage(), (x) -> new ArrayList<>())
|
||||||
|
.add(sectionArea);
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String, CellValue> tabularData = new HashMap<>();
|
||||||
|
List<Integer> cellStarts = new ArrayList<>();
|
||||||
|
for (Integer page : sectionAreasPerPage.keySet()) {
|
||||||
|
List<SectionArea> areasOnPage = sectionAreasPerPage.get(page);
|
||||||
|
|
||||||
|
PDPage pdPage = pdDocument.getPage(page - 1);
|
||||||
|
PDRectangle cropBox = pdPage.getCropBox();
|
||||||
|
PDFAreaTextStripper textStripper = new PDFAreaTextStripper();
|
||||||
|
textStripper.setPageNumber(page);
|
||||||
|
|
||||||
|
int cellStart = 0;
|
||||||
|
for (SectionArea sectionArea : areasOnPage) {
|
||||||
|
|
||||||
|
Rectangle2D rect = null;
|
||||||
|
if (pdPage.getRotation() == 90) {
|
||||||
|
rect = new Rectangle2D.Float(sectionArea.getTopLeft().getY(), sectionArea.getTopLeft()
|
||||||
|
.getX(), sectionArea.getHeight(), sectionArea.getWidth() + 0.001f);
|
||||||
|
} else {
|
||||||
|
rect = new Rectangle2D.Float(sectionArea.getTopLeft().getX(), -sectionArea.getTopLeft()
|
||||||
|
.getY() + cropBox.getUpperRightY() - sectionArea.getHeight(), sectionArea.getWidth(), sectionArea
|
||||||
|
.getHeight() + 0.001f);
|
||||||
|
}
|
||||||
|
|
||||||
|
textStripper.addRegion(String.valueOf(1), rect);
|
||||||
|
textStripper.extractRegions(pdPage);
|
||||||
|
textStripper.getTextForRegion(String.valueOf(1));
|
||||||
|
List<TextPositionSequence> positions = textStripper.getTextPositionSequences();
|
||||||
|
|
||||||
|
TextBlock textBlock = new TextBlock(sectionArea.getTopLeft().getX(), sectionArea.getTopLeft()
|
||||||
|
.getX() + sectionArea.getWidth(), sectionArea.getTopLeft()
|
||||||
|
.getY(), sectionArea.getTopLeft().getY() + sectionArea.getHeight(), positions, 0);
|
||||||
|
|
||||||
|
if (sectionText.isTable()) {
|
||||||
|
Cell cell = new Cell();
|
||||||
|
cell.addTextBlock(textBlock);
|
||||||
|
tabularData.put(sectionArea.getHeader(), new CellValue(cell.getTextBlocks(), cellStart));
|
||||||
|
cellStarts.add(cellStart);
|
||||||
|
cellStart = cellStart + cell.toString().trim().length() + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
textBlocks.add(textBlock);
|
||||||
|
textStripper.clearPositions();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
sectionText.setTextBlocks(textBlocks);
|
||||||
|
sectionText.setTabularData(tabularData);
|
||||||
|
if (sectionText.isTable()) {
|
||||||
|
sectionText.setCellStarts(cellStarts);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RedactionException(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public Document parseDocument(PDDocument pdDocument) throws IOException {
|
public Document parseDocument(PDDocument pdDocument) throws IOException {
|
||||||
|
|
||||||
Document document = new Document();
|
Document document = new Document();
|
||||||
@ -82,6 +166,9 @@ public class PdfSegmentationService {
|
|||||||
sectionsBuilderService.buildSections(document);
|
sectionsBuilderService.buildSections(document);
|
||||||
sectionsBuilderService.addImagesToSections(document);
|
sectionsBuilderService.addImagesToSections(document);
|
||||||
|
|
||||||
|
// This can be improved an done in one pass, but it's complicated to do right away
|
||||||
|
postProcessSections(pdDocument, document.getSectionText());
|
||||||
|
|
||||||
return document;
|
return document;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -116,4 +203,4 @@ public class PdfSegmentationService {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,29 +1,15 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.segmentation;
|
package com.iqser.red.service.redaction.v1.server.segmentation;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import com.iqser.red.service.redaction.v1.server.classification.model.*;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.SortedSet;
|
|
||||||
import java.util.TreeSet;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.apache.commons.collections4.CollectionUtils;
|
|
||||||
import org.springframework.stereotype.Service;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Footer;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Header;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.UnclassifiedText;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||||
|
import org.apache.commons.collections4.CollectionUtils;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
public class SectionsBuilderService {
|
public class SectionsBuilderService {
|
||||||
@ -302,4 +288,4 @@ public class SectionsBuilderService {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,17 +1,16 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.settings;
|
package com.iqser.red.service.redaction.v1.server.settings;
|
||||||
|
|
||||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
|
||||||
|
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
|
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@ConfigurationProperties("redaction-service")
|
@ConfigurationProperties("redaction-service")
|
||||||
public class RedactionServiceSettings {
|
public class RedactionServiceSettings {
|
||||||
|
|
||||||
private int numberOfSurroundingWords = 3;
|
private int numberOfSurroundingWords = 3;
|
||||||
|
|
||||||
private int surroundingWordsOffsetWindow = 100;
|
private int surroundingWordsOffsetWindow = 100;
|
||||||
|
|
||||||
private boolean enableImageClassification = true;
|
private boolean enableImageClassification = true;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -0,0 +1,109 @@
|
|||||||
|
package com.iqser.red.service.redaction.v1.server.storage;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.iqser.red.service.file.management.v1.api.model.FileType;
|
||||||
|
import com.iqser.red.service.redaction.v1.model.RedactionLog;
|
||||||
|
import com.iqser.red.service.redaction.v1.model.SectionGrid;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.classification.model.Text;
|
||||||
|
import com.iqser.red.storage.commons.exception.StorageObjectDoesNotExist;
|
||||||
|
import com.iqser.red.storage.commons.service.StorageService;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.SneakyThrows;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.core.io.InputStreamResource;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class RedactionStorageService {
|
||||||
|
|
||||||
|
private final ObjectMapper objectMapper;
|
||||||
|
private final StorageService storageService;
|
||||||
|
|
||||||
|
@SneakyThrows
|
||||||
|
public InputStream getStoredObject(String storageId) {
|
||||||
|
return storageService.getObject(storageId).getInputStream();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@SneakyThrows
|
||||||
|
public void storeObject(String projectId, String fileId, FileType fileType, Object any) {
|
||||||
|
storageService.storeObject(StorageIdUtils.getStorageId(projectId, fileId, fileType), objectMapper.writeValueAsBytes(any));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public RedactionLog getRedactionLog(String projectId, String fileId) {
|
||||||
|
|
||||||
|
InputStreamResource inputStreamResource;
|
||||||
|
try {
|
||||||
|
inputStreamResource = storageService.getObject(StorageIdUtils.getStorageId(projectId, fileId, FileType.REDACTION_LOG));
|
||||||
|
} catch (StorageObjectDoesNotExist e) {
|
||||||
|
log.debug("Text not available.");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return objectMapper.readValue(inputStreamResource.getInputStream(), RedactionLog.class);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException("Could not convert Text", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public Text getText(String projectId, String fileId) {
|
||||||
|
|
||||||
|
InputStreamResource inputStreamResource;
|
||||||
|
try {
|
||||||
|
inputStreamResource = storageService.getObject(StorageIdUtils.getStorageId(projectId, fileId, FileType.TEXT));
|
||||||
|
} catch (StorageObjectDoesNotExist e) {
|
||||||
|
log.debug("Text not available.");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return objectMapper.readValue(inputStreamResource.getInputStream(), Text.class);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException("Could not convert Text", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public SectionGrid getSectionGrid(String projectId, String fileId) {
|
||||||
|
|
||||||
|
var sectionGrid = storageService.getObject(StorageIdUtils.getStorageId(projectId, fileId, FileType.SECTION_GRID));
|
||||||
|
try {
|
||||||
|
return objectMapper.readValue(sectionGrid.getInputStream(), SectionGrid.class);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException("Could not convert RedactionLog", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public enum StorageType {
|
||||||
|
PARSED_DOCUMENT(".json");
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
private final String extension;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class StorageIdUtils {
|
||||||
|
|
||||||
|
public static String getStorageId(String projectId, String fileId, FileType fileType) {
|
||||||
|
return projectId + "/" + fileId + "." + fileType.name() + fileType.getExtension();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static String getStorageId(String userId, String projectId, String filename) {
|
||||||
|
|
||||||
|
return userId + "/" + projectId + "/" + filename;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,7 +1,6 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
@ -25,7 +24,7 @@ public abstract class AbstractTextContainer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public boolean contains(Rectangle other) {
|
public boolean contains(Rectangle other) {
|
||||||
return page == other.getPage() && this.minX <= other.getTopLeft().getX() && this.maxX >= other.getTopLeft().getX() + other.getWidth() && this.minY <= other.getTopLeft().getY() && this.maxY >= other.getTopLeft().getY() + other.getHeight();
|
return page == other.getPage() && this.minX <= other.getTopLeft().getX() && this.maxX >= other.getTopLeft().getX() + other.getWidth() && this.minY <= other.getTopLeft().getY() && this.maxY >= other.getTopLeft().getY() + other.getHeight();
|
||||||
}
|
}
|
||||||
|
|
||||||
public float getHeight() {
|
public float getHeight() {
|
||||||
@ -36,4 +35,4 @@ public abstract class AbstractTextContainer {
|
|||||||
return maxX - minX;
|
return maxX - minX;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,18 +1,17 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
||||||
|
|
||||||
|
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.EqualsAndHashCode;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
import java.awt.geom.Point2D;
|
import java.awt.geom.Point2D;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
|
||||||
|
|
||||||
import lombok.Data;
|
|
||||||
import lombok.EqualsAndHashCode;
|
|
||||||
import lombok.NoArgsConstructor;
|
|
||||||
|
|
||||||
@SuppressWarnings("serial")
|
@SuppressWarnings("serial")
|
||||||
@Data
|
@Data
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@ -71,7 +70,4 @@ public class Cell extends Rectangle {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|||||||
@ -1,10 +1,10 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import lombok.Builder;
|
import lombok.Builder;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@Builder
|
@Builder
|
||||||
public class CleanRulings {
|
public class CleanRulings {
|
||||||
|
|||||||
@ -8,170 +8,171 @@ import java.util.List;
|
|||||||
@SuppressWarnings("all")
|
@SuppressWarnings("all")
|
||||||
public class Rectangle extends Rectangle2D.Float {
|
public class Rectangle extends Rectangle2D.Float {
|
||||||
|
|
||||||
/**
|
protected static final float VERTICAL_COMPARISON_THRESHOLD = 0.4f;
|
||||||
* Ill-defined comparator, from when Rectangle was Comparable.
|
/**
|
||||||
*
|
* Ill-defined comparator, from when Rectangle was Comparable.
|
||||||
* see https://github.com/tabulapdf/tabula-java/issues/116
|
* <p>
|
||||||
* @deprecated with no replacement
|
* see https://github.com/tabulapdf/tabula-java/issues/116
|
||||||
*/
|
*
|
||||||
@Deprecated
|
* @deprecated with no replacement
|
||||||
public static final Comparator<Rectangle> ILL_DEFINED_ORDER = new Comparator<Rectangle>() {
|
*/
|
||||||
@Override public int compare(Rectangle o1, Rectangle o2) {
|
@Deprecated
|
||||||
if (o1.equals(o2)) return 0;
|
public static final Comparator<Rectangle> ILL_DEFINED_ORDER = new Comparator<Rectangle>() {
|
||||||
if (o1.verticalOverlap(o2) > VERTICAL_COMPARISON_THRESHOLD) {
|
@Override
|
||||||
return o1.isLtrDominant() == -1 && o2.isLtrDominant() == -1
|
public int compare(Rectangle o1, Rectangle o2) {
|
||||||
? - java.lang.Double.compare(o1.getX(), o2.getX())
|
if (o1.equals(o2)) return 0;
|
||||||
: java.lang.Double.compare(o1.getX(), o2.getX());
|
if (o1.verticalOverlap(o2) > VERTICAL_COMPARISON_THRESHOLD) {
|
||||||
} else {
|
return o1.isLtrDominant() == -1 && o2.isLtrDominant() == -1
|
||||||
return java.lang.Float.compare(o1.getBottom(), o2.getBottom());
|
? -java.lang.Double.compare(o1.getX(), o2.getX())
|
||||||
}
|
: java.lang.Double.compare(o1.getX(), o2.getX());
|
||||||
}
|
} else {
|
||||||
};
|
return java.lang.Float.compare(o1.getBottom(), o2.getBottom());
|
||||||
|
}
|
||||||
protected static final float VERTICAL_COMPARISON_THRESHOLD = 0.4f;
|
}
|
||||||
|
};
|
||||||
|
|
||||||
public Rectangle() {
|
public Rectangle() {
|
||||||
super();
|
super();
|
||||||
}
|
}
|
||||||
|
|
||||||
public Rectangle(float top, float left, float width, float height) {
|
public Rectangle(float top, float left, float width, float height) {
|
||||||
super();
|
super();
|
||||||
this.setRect(left, top, width, height);
|
this.setRect(left, top, width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
public int compareTo(Rectangle other) {
|
/**
|
||||||
return ILL_DEFINED_ORDER.compare(this, other);
|
* @param rectangles
|
||||||
}
|
* @return minimum bounding box that contains all the rectangles
|
||||||
|
*/
|
||||||
|
public static Rectangle boundingBoxOf(List<? extends Rectangle> rectangles) {
|
||||||
|
float minx = java.lang.Float.MAX_VALUE;
|
||||||
|
float miny = java.lang.Float.MAX_VALUE;
|
||||||
|
float maxx = java.lang.Float.MIN_VALUE;
|
||||||
|
float maxy = java.lang.Float.MIN_VALUE;
|
||||||
|
|
||||||
// I'm bad at Java and need this for fancy sorting in
|
for (Rectangle r : rectangles) {
|
||||||
// technology.tabula.TextChunk.
|
minx = (float) Math.min(r.getMinX(), minx);
|
||||||
public int isLtrDominant() {
|
miny = (float) Math.min(r.getMinY(), miny);
|
||||||
return 0;
|
maxx = (float) Math.max(r.getMaxX(), maxx);
|
||||||
}
|
maxy = (float) Math.max(r.getMaxY(), maxy);
|
||||||
|
}
|
||||||
|
return new Rectangle(miny, minx, maxx - minx, maxy - miny);
|
||||||
|
}
|
||||||
|
|
||||||
public float getArea() {
|
public int compareTo(Rectangle other) {
|
||||||
return this.width * this.height;
|
return ILL_DEFINED_ORDER.compare(this, other);
|
||||||
}
|
}
|
||||||
|
|
||||||
public float verticalOverlap(Rectangle other) {
|
// I'm bad at Java and need this for fancy sorting in
|
||||||
return Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop()));
|
// technology.tabula.TextChunk.
|
||||||
}
|
public int isLtrDominant() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean verticallyOverlaps(Rectangle other) {
|
public float getArea() {
|
||||||
return verticalOverlap(other) > 0;
|
return this.width * this.height;
|
||||||
}
|
}
|
||||||
|
|
||||||
public float horizontalOverlap(Rectangle other) {
|
public float verticalOverlap(Rectangle other) {
|
||||||
return Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft()));
|
return Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop()));
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean horizontallyOverlaps(Rectangle other) {
|
public boolean verticallyOverlaps(Rectangle other) {
|
||||||
return horizontalOverlap(other) > 0;
|
return verticalOverlap(other) > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public float verticalOverlapRatio(Rectangle other) {
|
public float horizontalOverlap(Rectangle other) {
|
||||||
float rv = 0, delta = Math.min(this.getBottom() - this.getTop(), other.getBottom() - other.getTop());
|
return Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft()));
|
||||||
|
}
|
||||||
|
|
||||||
if (other.getTop() <= this.getTop() && this.getTop() <= other.getBottom()
|
public boolean horizontallyOverlaps(Rectangle other) {
|
||||||
&& other.getBottom() <= this.getBottom()) {
|
return horizontalOverlap(other) > 0;
|
||||||
rv = (other.getBottom() - this.getTop()) / delta;
|
}
|
||||||
} else if (this.getTop() <= other.getTop() && other.getTop() <= this.getBottom()
|
|
||||||
&& this.getBottom() <= other.getBottom()) {
|
|
||||||
rv = (this.getBottom() - other.getTop()) / delta;
|
|
||||||
} else if (this.getTop() <= other.getTop() && other.getTop() <= other.getBottom()
|
|
||||||
&& other.getBottom() <= this.getBottom()) {
|
|
||||||
rv = (other.getBottom() - other.getTop()) / delta;
|
|
||||||
} else if (other.getTop() <= this.getTop() && this.getTop() <= this.getBottom()
|
|
||||||
&& this.getBottom() <= other.getBottom()) {
|
|
||||||
rv = (this.getBottom() - this.getTop()) / delta;
|
|
||||||
}
|
|
||||||
|
|
||||||
return rv;
|
public float verticalOverlapRatio(Rectangle other) {
|
||||||
|
float rv = 0, delta = Math.min(this.getBottom() - this.getTop(), other.getBottom() - other.getTop());
|
||||||
|
|
||||||
}
|
if (other.getTop() <= this.getTop() && this.getTop() <= other.getBottom()
|
||||||
|
&& other.getBottom() <= this.getBottom()) {
|
||||||
|
rv = (other.getBottom() - this.getTop()) / delta;
|
||||||
|
} else if (this.getTop() <= other.getTop() && other.getTop() <= this.getBottom()
|
||||||
|
&& this.getBottom() <= other.getBottom()) {
|
||||||
|
rv = (this.getBottom() - other.getTop()) / delta;
|
||||||
|
} else if (this.getTop() <= other.getTop() && other.getTop() <= other.getBottom()
|
||||||
|
&& other.getBottom() <= this.getBottom()) {
|
||||||
|
rv = (other.getBottom() - other.getTop()) / delta;
|
||||||
|
} else if (other.getTop() <= this.getTop() && this.getTop() <= this.getBottom()
|
||||||
|
&& this.getBottom() <= other.getBottom()) {
|
||||||
|
rv = (this.getBottom() - this.getTop()) / delta;
|
||||||
|
}
|
||||||
|
|
||||||
public float overlapRatio(Rectangle other) {
|
return rv;
|
||||||
double intersectionWidth = Math.max(0,
|
|
||||||
Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft()));
|
|
||||||
double intersectionHeight = Math.max(0,
|
|
||||||
Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop()));
|
|
||||||
double intersectionArea = Math.max(0, intersectionWidth * intersectionHeight);
|
|
||||||
double unionArea = this.getArea() + other.getArea() - intersectionArea;
|
|
||||||
|
|
||||||
return (float) (intersectionArea / unionArea);
|
}
|
||||||
}
|
|
||||||
|
|
||||||
public Rectangle merge(Rectangle other) {
|
public float overlapRatio(Rectangle other) {
|
||||||
this.setRect(this.createUnion(other));
|
double intersectionWidth = Math.max(0,
|
||||||
return this;
|
Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft()));
|
||||||
}
|
double intersectionHeight = Math.max(0,
|
||||||
|
Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop()));
|
||||||
|
double intersectionArea = Math.max(0, intersectionWidth * intersectionHeight);
|
||||||
|
double unionArea = this.getArea() + other.getArea() - intersectionArea;
|
||||||
|
|
||||||
public float getTop() {
|
return (float) (intersectionArea / unionArea);
|
||||||
return (float) this.getMinY();
|
}
|
||||||
}
|
|
||||||
|
|
||||||
public void setTop(float top) {
|
public Rectangle merge(Rectangle other) {
|
||||||
float deltaHeight = top - this.y;
|
this.setRect(this.createUnion(other));
|
||||||
this.setRect(this.x, top, this.width, this.height - deltaHeight);
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public float getRight() {
|
public float getTop() {
|
||||||
return (float) this.getMaxX();
|
return (float) this.getMinY();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setRight(float right) {
|
public void setTop(float top) {
|
||||||
this.setRect(this.x, this.y, right - this.x, this.height);
|
float deltaHeight = top - this.y;
|
||||||
}
|
this.setRect(this.x, top, this.width, this.height - deltaHeight);
|
||||||
|
}
|
||||||
|
|
||||||
public float getLeft() {
|
public float getRight() {
|
||||||
return (float) this.getMinX();
|
return (float) this.getMaxX();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setLeft(float left) {
|
public void setRight(float right) {
|
||||||
float deltaWidth = left - this.x;
|
this.setRect(this.x, this.y, right - this.x, this.height);
|
||||||
this.setRect(left, this.y, this.width - deltaWidth, this.height);
|
}
|
||||||
}
|
|
||||||
|
|
||||||
public float getBottom() {
|
public float getLeft() {
|
||||||
return (float) this.getMaxY();
|
return (float) this.getMinX();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setBottom(float bottom) {
|
public void setLeft(float left) {
|
||||||
this.setRect(this.x, this.y, this.width, bottom - this.y);
|
float deltaWidth = left - this.x;
|
||||||
}
|
this.setRect(left, this.y, this.width - deltaWidth, this.height);
|
||||||
|
}
|
||||||
|
|
||||||
public Point2D[] getPoints() {
|
public float getBottom() {
|
||||||
return new Point2D[] { new Point2D.Float(this.getLeft(), this.getTop()),
|
return (float) this.getMaxY();
|
||||||
new Point2D.Float(this.getRight(), this.getTop()), new Point2D.Float(this.getRight(), this.getBottom()),
|
}
|
||||||
new Point2D.Float(this.getLeft(), this.getBottom()) };
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
public void setBottom(float bottom) {
|
||||||
public String toString() {
|
this.setRect(this.x, this.y, this.width, bottom - this.y);
|
||||||
StringBuilder sb = new StringBuilder();
|
}
|
||||||
String s = super.toString();
|
|
||||||
sb.append(s.substring(0, s.length() - 1));
|
|
||||||
sb.append(String.format(",bottom=%f,right=%f]", this.getBottom(), this.getRight()));
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
public Point2D[] getPoints() {
|
||||||
* @param rectangles
|
return new Point2D[]{new Point2D.Float(this.getLeft(), this.getTop()),
|
||||||
* @return minimum bounding box that contains all the rectangles
|
new Point2D.Float(this.getRight(), this.getTop()), new Point2D.Float(this.getRight(), this.getBottom()),
|
||||||
*/
|
new Point2D.Float(this.getLeft(), this.getBottom())};
|
||||||
public static Rectangle boundingBoxOf(List<? extends Rectangle> rectangles) {
|
}
|
||||||
float minx = java.lang.Float.MAX_VALUE;
|
|
||||||
float miny = java.lang.Float.MAX_VALUE;
|
|
||||||
float maxx = java.lang.Float.MIN_VALUE;
|
|
||||||
float maxy = java.lang.Float.MIN_VALUE;
|
|
||||||
|
|
||||||
for (Rectangle r : rectangles) {
|
@Override
|
||||||
minx = (float) Math.min(r.getMinX(), minx);
|
public String toString() {
|
||||||
miny = (float) Math.min(r.getMinY(), miny);
|
StringBuilder sb = new StringBuilder();
|
||||||
maxx = (float) Math.max(r.getMaxX(), maxx);
|
String s = super.toString();
|
||||||
maxy = (float) Math.max(r.getMaxY(), maxy);
|
sb.append(s.substring(0, s.length() - 1));
|
||||||
}
|
sb.append(String.format(",bottom=%f,right=%f]", this.getBottom(), this.getRight()));
|
||||||
return new Rectangle(miny, minx, maxx - minx, maxy - miny);
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,12 +1,11 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.locationtech.jts.geom.Envelope;
|
import org.locationtech.jts.geom.Envelope;
|
||||||
import org.locationtech.jts.index.strtree.STRtree;
|
import org.locationtech.jts.index.strtree.STRtree;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@SuppressWarnings("all")
|
@SuppressWarnings("all")
|
||||||
public class RectangleSpatialIndex<T extends Rectangle> {
|
public class RectangleSpatialIndex<T extends Rectangle> {
|
||||||
|
|||||||
@ -1,20 +1,13 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
||||||
|
|
||||||
|
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.CohenSutherlandClipping;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
import java.awt.geom.Line2D;
|
import java.awt.geom.Line2D;
|
||||||
import java.awt.geom.Point2D;
|
import java.awt.geom.Point2D;
|
||||||
import java.awt.geom.Rectangle2D;
|
import java.awt.geom.Rectangle2D;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.Formatter;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.TreeMap;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.CohenSutherlandClipping;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
|
||||||
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
|
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@ -23,13 +16,127 @@ public class Ruling extends Line2D.Float {
|
|||||||
|
|
||||||
private static int PERPENDICULAR_PIXEL_EXPAND_AMOUNT = 2;
|
private static int PERPENDICULAR_PIXEL_EXPAND_AMOUNT = 2;
|
||||||
|
|
||||||
private enum SOType {VERTICAL, HRIGHT, HLEFT}
|
|
||||||
|
|
||||||
|
|
||||||
public Ruling(Point2D p1, Point2D p2) {
|
public Ruling(Point2D p1, Point2D p2) {
|
||||||
super(p1, p2);
|
super(p1, p2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static List<Ruling> cropRulingsToArea(List<Ruling> rulings, Rectangle2D area) {
|
||||||
|
ArrayList<Ruling> rv = new ArrayList<>();
|
||||||
|
for (Ruling r : rulings) {
|
||||||
|
if (r.intersects(area)) {
|
||||||
|
rv.add(r.intersect(area));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
// log(n) implementation of find_intersections
|
||||||
|
// based on http://people.csail.mit.edu/indyk/6.838-old/handouts/lec2.pdf
|
||||||
|
public static Map<Point2D, Ruling[]> findIntersections(List<Ruling> horizontals, List<Ruling> verticals) {
|
||||||
|
|
||||||
|
class SortObject {
|
||||||
|
protected SOType type;
|
||||||
|
protected float position;
|
||||||
|
protected Ruling ruling;
|
||||||
|
|
||||||
|
public SortObject(SOType type, float position, Ruling ruling) {
|
||||||
|
this.type = type;
|
||||||
|
this.position = position;
|
||||||
|
this.ruling = ruling;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
List<SortObject> sos = new ArrayList<>();
|
||||||
|
|
||||||
|
TreeMap<Ruling, Boolean> tree = new TreeMap<>(new Comparator<Ruling>() {
|
||||||
|
@Override
|
||||||
|
public int compare(Ruling o1, Ruling o2) {
|
||||||
|
return java.lang.Double.compare(o1.getTop(), o2.getTop());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
TreeMap<Point2D, Ruling[]> rv = new TreeMap<>(new Comparator<Point2D>() {
|
||||||
|
@Override
|
||||||
|
public int compare(Point2D o1, Point2D o2) {
|
||||||
|
if (o1.getY() > o2.getY()) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (o1.getY() < o2.getY()) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (o1.getX() > o2.getX()) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (o1.getX() < o2.getX()) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
for (Ruling h : horizontals) {
|
||||||
|
sos.add(new SortObject(SOType.HLEFT, h.getLeft() - PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
|
||||||
|
sos.add(new SortObject(SOType.HRIGHT, h.getRight() + PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Ruling v : verticals) {
|
||||||
|
sos.add(new SortObject(SOType.VERTICAL, v.getLeft(), v));
|
||||||
|
}
|
||||||
|
|
||||||
|
Collections.sort(sos, new Comparator<SortObject>() {
|
||||||
|
@Override
|
||||||
|
public int compare(SortObject a, SortObject b) {
|
||||||
|
int rv;
|
||||||
|
if (Utils.feq(a.position, b.position)) {
|
||||||
|
if (a.type == SOType.VERTICAL && b.type == SOType.HLEFT) {
|
||||||
|
rv = 1;
|
||||||
|
} else if (a.type == SOType.VERTICAL && b.type == SOType.HRIGHT) {
|
||||||
|
rv = -1;
|
||||||
|
} else if (a.type == SOType.HLEFT && b.type == SOType.VERTICAL) {
|
||||||
|
rv = -1;
|
||||||
|
} else if (a.type == SOType.HRIGHT && b.type == SOType.VERTICAL) {
|
||||||
|
rv = 1;
|
||||||
|
} else {
|
||||||
|
rv = java.lang.Double.compare(a.position, b.position);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return java.lang.Double.compare(a.position, b.position);
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
for (SortObject so : sos) {
|
||||||
|
switch (so.type) {
|
||||||
|
case VERTICAL:
|
||||||
|
for (Map.Entry<Ruling, Boolean> h : tree.entrySet()) {
|
||||||
|
try {
|
||||||
|
Point2D i = h.getKey().intersectionPoint(so.ruling);
|
||||||
|
if (i == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
rv.put(i,
|
||||||
|
new Ruling[]{h.getKey().expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT),
|
||||||
|
so.ruling.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT)});
|
||||||
|
} catch (UnsupportedOperationException e) {
|
||||||
|
log.info("Some line are oblique, ignoring...");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case HRIGHT:
|
||||||
|
tree.remove(so.ruling);
|
||||||
|
break;
|
||||||
|
case HLEFT:
|
||||||
|
tree.put(so.ruling, true);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return rv;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
public boolean vertical() {
|
public boolean vertical() {
|
||||||
return this.length() > 0 && Utils.feq(this.x1, this.x2); //diff < ORIENTATION_CHECK_THRESHOLD;
|
return this.length() > 0 && Utils.feq(this.x1, this.x2); //diff < ORIENTATION_CHECK_THRESHOLD;
|
||||||
}
|
}
|
||||||
@ -38,13 +145,13 @@ public class Ruling extends Line2D.Float {
|
|||||||
return this.length() > 0 && Utils.feq(this.y1, this.y2); //diff < ORIENTATION_CHECK_THRESHOLD;
|
return this.length() > 0 && Utils.feq(this.y1, this.y2); //diff < ORIENTATION_CHECK_THRESHOLD;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// attributes that make sense only for non-oblique lines
|
||||||
|
// these are used to have a single collapse method (in page, currently)
|
||||||
|
|
||||||
public boolean oblique() {
|
public boolean oblique() {
|
||||||
return !(this.vertical() || this.horizontal());
|
return !(this.vertical() || this.horizontal());
|
||||||
}
|
}
|
||||||
|
|
||||||
// attributes that make sense only for non-oblique lines
|
|
||||||
// these are used to have a single collapse method (in page, currently)
|
|
||||||
|
|
||||||
public float getPosition() {
|
public float getPosition() {
|
||||||
if (this.oblique()) {
|
if (this.oblique()) {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
@ -52,7 +159,6 @@ public class Ruling extends Line2D.Float {
|
|||||||
return this.vertical() ? this.getLeft() : this.getTop();
|
return this.vertical() ? this.getLeft() : this.getTop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public float getStart() {
|
public float getStart() {
|
||||||
if (this.oblique()) {
|
if (this.oblique()) {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
@ -102,12 +208,10 @@ public class Ruling extends Line2D.Float {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public boolean perpendicularTo(Ruling other) {
|
public boolean perpendicularTo(Ruling other) {
|
||||||
return this.vertical() == other.horizontal();
|
return this.vertical() == other.horizontal();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public boolean nearlyIntersects(Ruling another, int colinearOrParallelExpandAmount) {
|
public boolean nearlyIntersects(Ruling another, int colinearOrParallelExpandAmount) {
|
||||||
if (this.intersectsLine(another)) {
|
if (this.intersectsLine(another)) {
|
||||||
return true;
|
return true;
|
||||||
@ -238,7 +342,6 @@ public class Ruling extends Line2D.Float {
|
|||||||
return angle;
|
return angle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
@ -248,122 +351,7 @@ public class Ruling extends Line2D.Float {
|
|||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<Ruling> cropRulingsToArea(List<Ruling> rulings, Rectangle2D area) {
|
private enum SOType {VERTICAL, HRIGHT, HLEFT}
|
||||||
ArrayList<Ruling> rv = new ArrayList<>();
|
|
||||||
for (Ruling r : rulings) {
|
|
||||||
if (r.intersects(area)) {
|
|
||||||
rv.add(r.intersect(area));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
|
|
||||||
// log(n) implementation of find_intersections
|
|
||||||
// based on http://people.csail.mit.edu/indyk/6.838-old/handouts/lec2.pdf
|
|
||||||
public static Map<Point2D, Ruling[]> findIntersections(List<Ruling> horizontals, List<Ruling> verticals) {
|
|
||||||
|
|
||||||
class SortObject {
|
|
||||||
protected SOType type;
|
|
||||||
protected float position;
|
|
||||||
protected Ruling ruling;
|
|
||||||
|
|
||||||
public SortObject(SOType type, float position, Ruling ruling) {
|
|
||||||
this.type = type;
|
|
||||||
this.position = position;
|
|
||||||
this.ruling = ruling;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
List<SortObject> sos = new ArrayList<>();
|
|
||||||
|
|
||||||
TreeMap<Ruling, Boolean> tree = new TreeMap<>(new Comparator<Ruling>() {
|
|
||||||
@Override
|
|
||||||
public int compare(Ruling o1, Ruling o2) {
|
|
||||||
return java.lang.Double.compare(o1.getTop(), o2.getTop());
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
TreeMap<Point2D, Ruling[]> rv = new TreeMap<>(new Comparator<Point2D>() {
|
|
||||||
@Override
|
|
||||||
public int compare(Point2D o1, Point2D o2) {
|
|
||||||
if (o1.getY() > o2.getY()) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
if (o1.getY() < o2.getY()) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if (o1.getX() > o2.getX()) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
if (o1.getX() < o2.getX()) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
for (Ruling h : horizontals) {
|
|
||||||
sos.add(new SortObject(SOType.HLEFT, h.getLeft() - PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
|
|
||||||
sos.add(new SortObject(SOType.HRIGHT, h.getRight() + PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
|
|
||||||
}
|
|
||||||
|
|
||||||
for (Ruling v : verticals) {
|
|
||||||
sos.add(new SortObject(SOType.VERTICAL, v.getLeft(), v));
|
|
||||||
}
|
|
||||||
|
|
||||||
Collections.sort(sos, new Comparator<SortObject>() {
|
|
||||||
@Override
|
|
||||||
public int compare(SortObject a, SortObject b) {
|
|
||||||
int rv;
|
|
||||||
if (Utils.feq(a.position, b.position)) {
|
|
||||||
if (a.type == SOType.VERTICAL && b.type == SOType.HLEFT) {
|
|
||||||
rv = 1;
|
|
||||||
} else if (a.type == SOType.VERTICAL && b.type == SOType.HRIGHT) {
|
|
||||||
rv = -1;
|
|
||||||
} else if (a.type == SOType.HLEFT && b.type == SOType.VERTICAL) {
|
|
||||||
rv = -1;
|
|
||||||
} else if (a.type == SOType.HRIGHT && b.type == SOType.VERTICAL) {
|
|
||||||
rv = 1;
|
|
||||||
} else {
|
|
||||||
rv = java.lang.Double.compare(a.position, b.position);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return java.lang.Double.compare(a.position, b.position);
|
|
||||||
}
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
for (SortObject so : sos) {
|
|
||||||
switch (so.type) {
|
|
||||||
case VERTICAL:
|
|
||||||
for (Map.Entry<Ruling, Boolean> h : tree.entrySet()) {
|
|
||||||
try {
|
|
||||||
Point2D i = h.getKey().intersectionPoint(so.ruling);
|
|
||||||
if (i == null) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
rv.put(i,
|
|
||||||
new Ruling[]{h.getKey().expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT),
|
|
||||||
so.ruling.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT)});
|
|
||||||
} catch(UnsupportedOperationException e){
|
|
||||||
log.info("Some line are oblique, ignoring...");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case HRIGHT:
|
|
||||||
tree.remove(so.ruling);
|
|
||||||
break;
|
|
||||||
case HLEFT:
|
|
||||||
tree.put(so.ruling, true);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return rv;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,22 +1,13 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.TreeMap;
|
|
||||||
|
|
||||||
import org.apache.commons.collections4.CollectionUtils;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||||
|
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
import lombok.Setter;
|
import lombok.Setter;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.apache.commons.collections4.CollectionUtils;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class Table extends AbstractTextContainer {
|
public class Table extends AbstractTextContainer {
|
||||||
@ -24,21 +15,14 @@ public class Table extends AbstractTextContainer {
|
|||||||
private final TreeMap<CellPosition, Cell> cells = new TreeMap<>();
|
private final TreeMap<CellPosition, Cell> cells = new TreeMap<>();
|
||||||
|
|
||||||
private final RectangleSpatialIndex<Cell> si = new RectangleSpatialIndex<>();
|
private final RectangleSpatialIndex<Cell> si = new RectangleSpatialIndex<>();
|
||||||
|
private final int rotation;
|
||||||
@Getter
|
@Getter
|
||||||
@Setter
|
@Setter
|
||||||
private String headline;
|
private String headline;
|
||||||
|
|
||||||
private int unrotatedRowCount;
|
private int unrotatedRowCount;
|
||||||
|
|
||||||
private int unrotatedColCount;
|
private int unrotatedColCount;
|
||||||
|
|
||||||
private int rowCount = -1;
|
private int rowCount = -1;
|
||||||
|
|
||||||
private int colCount = -1;
|
private int colCount = -1;
|
||||||
|
|
||||||
private final int rotation;
|
|
||||||
|
|
||||||
private List<List<Cell>> rows;
|
private List<List<Cell>> rows;
|
||||||
|
|
||||||
|
|
||||||
@ -62,8 +46,8 @@ public class Table extends AbstractTextContainer {
|
|||||||
|
|
||||||
// Ignore rows that does not contain any cells and values.
|
// Ignore rows that does not contain any cells and values.
|
||||||
List<List<Cell>> rowsToRemove = new ArrayList<>();
|
List<List<Cell>> rowsToRemove = new ArrayList<>();
|
||||||
for (List<Cell> row: rows){
|
for (List<Cell> row : rows) {
|
||||||
if (row.size() == 1 && row.get(0).getTextBlocks().isEmpty()){
|
if (row.size() == 1 && row.get(0).getTextBlocks().isEmpty()) {
|
||||||
rowsToRemove.add(row);
|
rowsToRemove.add(row);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -110,7 +94,7 @@ public class Table extends AbstractTextContainer {
|
|||||||
// we move from left to right and top to bottom
|
// we move from left to right and top to bottom
|
||||||
for (int rowIndex = 0; rowIndex < rows.size(); rowIndex++) {
|
for (int rowIndex = 0; rowIndex < rows.size(); rowIndex++) {
|
||||||
List<Cell> rowCells = rows.get(rowIndex);
|
List<Cell> rowCells = rows.get(rowIndex);
|
||||||
if(rowCells.size() == 1){
|
if (rowCells.size() == 1) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -275,7 +259,7 @@ public class Table extends AbstractTextContainer {
|
|||||||
|
|
||||||
cells.sort(Collections.reverseOrder((arg0, arg1) -> Float.compare(Utils.round(arg0.getBottom(), 2),
|
cells.sort(Collections.reverseOrder((arg0, arg1) -> Float.compare(Utils.round(arg0.getBottom(), 2),
|
||||||
Utils.round(arg1
|
Utils.round(arg1
|
||||||
.getBottom(), 2))));
|
.getBottom(), 2))));
|
||||||
|
|
||||||
Iterator<Cell> iter = cells.iterator();
|
Iterator<Cell> iter = cells.iterator();
|
||||||
Cell c = iter.next();
|
Cell c = iter.next();
|
||||||
@ -367,4 +351,4 @@ public class Table extends AbstractTextContainer {
|
|||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,19 +1,13 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.tableextraction.service;
|
package com.iqser.red.service.redaction.v1.server.tableextraction.service;
|
||||||
|
|
||||||
import java.awt.geom.Line2D;
|
|
||||||
import java.awt.geom.Point2D;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.springframework.stereotype.Service;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import java.awt.geom.Line2D;
|
||||||
|
import java.awt.geom.Point2D;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
public class RulingCleaningService {
|
public class RulingCleaningService {
|
||||||
|
|||||||
@ -1,31 +1,57 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.tableextraction.service;
|
package com.iqser.red.service.redaction.v1.server.tableextraction.service;
|
||||||
|
|
||||||
import java.awt.geom.Point2D;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.springframework.stereotype.Service;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.*;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import java.awt.geom.Point2D;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
public class TableExtractionService {
|
public class TableExtractionService {
|
||||||
|
|
||||||
|
private static final Comparator<Point2D> X_FIRST_POINT_COMPARATOR = (arg0, arg1) -> {
|
||||||
|
|
||||||
|
int rv = 0;
|
||||||
|
float arg0X = Utils.round(arg0.getX(), 2);
|
||||||
|
float arg0Y = Utils.round(arg0.getY(), 2);
|
||||||
|
float arg1X = Utils.round(arg1.getX(), 2);
|
||||||
|
float arg1Y = Utils.round(arg1.getY(), 2);
|
||||||
|
|
||||||
|
if (arg0X > arg1X) {
|
||||||
|
rv = 1;
|
||||||
|
} else if (arg0X < arg1X) {
|
||||||
|
rv = -1;
|
||||||
|
} else if (arg0Y > arg1Y) {
|
||||||
|
rv = 1;
|
||||||
|
} else if (arg0Y < arg1Y) {
|
||||||
|
rv = -1;
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
};
|
||||||
|
private static final Comparator<Point2D> POINT_COMPARATOR = (arg0, arg1) -> {
|
||||||
|
|
||||||
|
int rv = 0;
|
||||||
|
float arg0X = Utils.round(arg0.getX(), 2);
|
||||||
|
float arg0Y = Utils.round(arg0.getY(), 2);
|
||||||
|
float arg1X = Utils.round(arg1.getX(), 2);
|
||||||
|
float arg1Y = Utils.round(arg1.getY(), 2);
|
||||||
|
|
||||||
|
if (arg0Y > arg1Y) {
|
||||||
|
rv = 1;
|
||||||
|
} else if (arg0Y < arg1Y) {
|
||||||
|
rv = -1;
|
||||||
|
} else if (arg0X > arg1X) {
|
||||||
|
rv = 1;
|
||||||
|
} else if (arg0X < arg1X) {
|
||||||
|
rv = -1;
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
};
|
||||||
|
|
||||||
public void extractTables(CleanRulings cleanRulings, Page page) {
|
public void extractTables(CleanRulings cleanRulings, Page page) {
|
||||||
|
|
||||||
List<Cell> cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical());
|
List<Cell> cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical());
|
||||||
@ -80,7 +106,6 @@ public class TableExtractionService {
|
|||||||
page.getTextBlocks().removeAll(toBeRemoved);
|
page.getTextBlocks().removeAll(toBeRemoved);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public List<Cell> findCells(List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
|
public List<Cell> findCells(List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
|
||||||
|
|
||||||
List<Cell> cellsFound = new ArrayList<>();
|
List<Cell> cellsFound = new ArrayList<>();
|
||||||
@ -133,7 +158,6 @@ public class TableExtractionService {
|
|||||||
return cellsFound;
|
return cellsFound;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private List<Rectangle> findSpreadsheetsFromCells(List<? extends Rectangle> cells) {
|
private List<Rectangle> findSpreadsheetsFromCells(List<? extends Rectangle> cells) {
|
||||||
// via: http://stackoverflow.com/questions/13746284/merging-multiple-adjacent-rectangles-into-one-polygon
|
// via: http://stackoverflow.com/questions/13746284/merging-multiple-adjacent-rectangles-into-one-polygon
|
||||||
List<Rectangle> rectangles = new ArrayList<>();
|
List<Rectangle> rectangles = new ArrayList<>();
|
||||||
@ -233,47 +257,6 @@ public class TableExtractionService {
|
|||||||
return rectangles;
|
return rectangles;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static final Comparator<Point2D> X_FIRST_POINT_COMPARATOR = (arg0, arg1) -> {
|
|
||||||
|
|
||||||
int rv = 0;
|
|
||||||
float arg0X = Utils.round(arg0.getX(), 2);
|
|
||||||
float arg0Y = Utils.round(arg0.getY(), 2);
|
|
||||||
float arg1X = Utils.round(arg1.getX(), 2);
|
|
||||||
float arg1Y = Utils.round(arg1.getY(), 2);
|
|
||||||
|
|
||||||
if (arg0X > arg1X) {
|
|
||||||
rv = 1;
|
|
||||||
} else if (arg0X < arg1X) {
|
|
||||||
rv = -1;
|
|
||||||
} else if (arg0Y > arg1Y) {
|
|
||||||
rv = 1;
|
|
||||||
} else if (arg0Y < arg1Y) {
|
|
||||||
rv = -1;
|
|
||||||
}
|
|
||||||
return rv;
|
|
||||||
};
|
|
||||||
|
|
||||||
private static final Comparator<Point2D> POINT_COMPARATOR = (arg0, arg1) -> {
|
|
||||||
|
|
||||||
int rv = 0;
|
|
||||||
float arg0X = Utils.round(arg0.getX(), 2);
|
|
||||||
float arg0Y = Utils.round(arg0.getY(), 2);
|
|
||||||
float arg1X = Utils.round(arg1.getX(), 2);
|
|
||||||
float arg1Y = Utils.round(arg1.getY(), 2);
|
|
||||||
|
|
||||||
if (arg0Y > arg1Y) {
|
|
||||||
rv = 1;
|
|
||||||
} else if (arg0Y < arg1Y) {
|
|
||||||
rv = -1;
|
|
||||||
} else if (arg0X > arg1X) {
|
|
||||||
rv = 1;
|
|
||||||
} else if (arg0X < arg1X) {
|
|
||||||
rv = -1;
|
|
||||||
}
|
|
||||||
return rv;
|
|
||||||
};
|
|
||||||
|
|
||||||
private enum Direction {
|
private enum Direction {
|
||||||
HORIZONTAL, VERTICAL
|
HORIZONTAL, VERTICAL
|
||||||
}
|
}
|
||||||
|
|||||||
@ -19,21 +19,24 @@ import java.awt.geom.Rectangle2D;
|
|||||||
* clipping algorithm (line against clip rectangle).
|
* clipping algorithm (line against clip rectangle).
|
||||||
*/
|
*/
|
||||||
@SuppressWarnings("all")
|
@SuppressWarnings("all")
|
||||||
public final class CohenSutherlandClipping
|
public final class CohenSutherlandClipping {
|
||||||
{
|
private static final int INSIDE = 0;
|
||||||
|
private static final int LEFT = 1;
|
||||||
|
private static final int RIGHT = 2;
|
||||||
|
private static final int BOTTOM = 4;
|
||||||
|
private static final int TOP = 8;
|
||||||
private double xMin;
|
private double xMin;
|
||||||
private double yMin;
|
private double yMin;
|
||||||
private double xMax;
|
private double xMax;
|
||||||
private double yMax;
|
private double yMax;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a Cohen Sutherland clipper with clip rect (0, 0, 0, 0).
|
* Creates a Cohen Sutherland clipper with clip rect (0, 0, 0, 0).
|
||||||
*/
|
*/
|
||||||
public CohenSutherlandClipping() {
|
public CohenSutherlandClipping() {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a Cohen Sutherland clipper with the given clip rectangle.
|
* Creates a Cohen Sutherland clipper with the given clip rectangle.
|
||||||
|
*
|
||||||
* @param clip the clip rectangle to use
|
* @param clip the clip rectangle to use
|
||||||
*/
|
*/
|
||||||
public CohenSutherlandClipping(Rectangle2D clip) {
|
public CohenSutherlandClipping(Rectangle2D clip) {
|
||||||
@ -42,6 +45,7 @@ public final class CohenSutherlandClipping
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the clip rectangle.
|
* Sets the clip rectangle.
|
||||||
|
*
|
||||||
* @param clip the clip rectangle
|
* @param clip the clip rectangle
|
||||||
*/
|
*/
|
||||||
public void setClip(Rectangle2D clip) {
|
public void setClip(Rectangle2D clip) {
|
||||||
@ -51,19 +55,13 @@ public final class CohenSutherlandClipping
|
|||||||
yMax = yMin + clip.getHeight();
|
yMax = yMin + clip.getHeight();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final int INSIDE = 0;
|
|
||||||
private static final int LEFT = 1;
|
|
||||||
private static final int RIGHT = 2;
|
|
||||||
private static final int BOTTOM = 4;
|
|
||||||
private static final int TOP = 8;
|
|
||||||
|
|
||||||
private final int regionCode(double x, double y) {
|
private final int regionCode(double x, double y) {
|
||||||
int code = x < xMin
|
int code = x < xMin
|
||||||
? LEFT
|
? LEFT
|
||||||
: x > xMax
|
: x > xMax
|
||||||
? RIGHT
|
? RIGHT
|
||||||
: INSIDE;
|
: INSIDE;
|
||||||
if (y < yMin) code |= BOTTOM;
|
if (y < yMin) code |= BOTTOM;
|
||||||
else if (y > yMax) code |= TOP;
|
else if (y > yMax) code |= TOP;
|
||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
@ -71,6 +69,7 @@ public final class CohenSutherlandClipping
|
|||||||
/**
|
/**
|
||||||
* Clips a given line against the clip rectangle.
|
* Clips a given line against the clip rectangle.
|
||||||
* The modification (if needed) is done in place.
|
* The modification (if needed) is done in place.
|
||||||
|
*
|
||||||
* @param line the line to clip
|
* @param line the line to clip
|
||||||
* @return true if line is clipped, false if line is
|
* @return true if line is clipped, false if line is
|
||||||
* totally outside the clip rect.
|
* totally outside the clip rect.
|
||||||
@ -87,9 +86,9 @@ public final class CohenSutherlandClipping
|
|||||||
|
|
||||||
boolean vertical = p1x == p2x;
|
boolean vertical = p1x == p2x;
|
||||||
|
|
||||||
double slope = vertical
|
double slope = vertical
|
||||||
? 0d
|
? 0d
|
||||||
: (p2y-p1y)/(p2x-p1x);
|
: (p2y - p1y) / (p2x - p1x);
|
||||||
|
|
||||||
int c1 = regionCode(p1x, p1y);
|
int c1 = regionCode(p1x, p1y);
|
||||||
int c2 = regionCode(p2x, p2y);
|
int c2 = regionCode(p2x, p2y);
|
||||||
@ -103,31 +102,27 @@ public final class CohenSutherlandClipping
|
|||||||
|
|
||||||
if ((c & LEFT) != INSIDE) {
|
if ((c & LEFT) != INSIDE) {
|
||||||
qx = xMin;
|
qx = xMin;
|
||||||
qy = (Utils.feq(qx, p1x) ? 0 : qx-p1x)*slope + p1y;
|
qy = (Utils.feq(qx, p1x) ? 0 : qx - p1x) * slope + p1y;
|
||||||
}
|
} else if ((c & RIGHT) != INSIDE) {
|
||||||
else if ((c & RIGHT) != INSIDE) {
|
|
||||||
qx = xMax;
|
qx = xMax;
|
||||||
qy = (Utils.feq(qx, p1x) ? 0 : qx-p1x)*slope + p1y;
|
qy = (Utils.feq(qx, p1x) ? 0 : qx - p1x) * slope + p1y;
|
||||||
}
|
} else if ((c & BOTTOM) != INSIDE) {
|
||||||
else if ((c & BOTTOM) != INSIDE) {
|
|
||||||
qy = yMin;
|
qy = yMin;
|
||||||
qx = vertical
|
qx = vertical
|
||||||
? p1x
|
? p1x
|
||||||
: (Utils.feq(qy, p1y) ? 0 : qy-p1y)/slope + p1x;
|
: (Utils.feq(qy, p1y) ? 0 : qy - p1y) / slope + p1x;
|
||||||
}
|
} else if ((c & TOP) != INSIDE) {
|
||||||
else if ((c & TOP) != INSIDE) {
|
|
||||||
qy = yMax;
|
qy = yMax;
|
||||||
qx = vertical
|
qx = vertical
|
||||||
? p1x
|
? p1x
|
||||||
: (Utils.feq(qy, p1y) ? 0 : qy-p1y)/slope + p1x;
|
: (Utils.feq(qy, p1y) ? 0 : qy - p1y) / slope + p1x;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c == c1) {
|
if (c == c1) {
|
||||||
p1x = qx;
|
p1x = qx;
|
||||||
p1y = qy;
|
p1y = qy;
|
||||||
c1 = regionCode(p1x, p1y);
|
c1 = regionCode(p1x, p1y);
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
p2x = qx;
|
p2x = qx;
|
||||||
p2y = qy;
|
p2y = qy;
|
||||||
c2 = regionCode(p2x, p2y);
|
c2 = regionCode(p2x, p2y);
|
||||||
@ -137,4 +132,4 @@ public final class CohenSutherlandClipping
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// end of file
|
// end of file
|
||||||
|
|||||||
@ -10,11 +10,6 @@ import java.util.List;
|
|||||||
*/
|
*/
|
||||||
public final class QuickSort {
|
public final class QuickSort {
|
||||||
|
|
||||||
private QuickSort() {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private static final Comparator<? extends Comparable> OBJCOMP = new Comparator<Comparable>() {
|
private static final Comparator<? extends Comparable> OBJCOMP = new Comparator<Comparable>() {
|
||||||
@Override
|
@Override
|
||||||
public int compare(Comparable object1, Comparable object2) {
|
public int compare(Comparable object1, Comparable object2) {
|
||||||
@ -24,6 +19,10 @@ public final class QuickSort {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
private QuickSort() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sorts the given list using the given comparator.
|
* Sorts the given list using the given comparator.
|
||||||
*
|
*
|
||||||
|
|||||||
@ -1,11 +1,11 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.tableextraction.utils;
|
package com.iqser.red.service.redaction.v1.server.tableextraction.utils;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
import java.math.BigDecimal;
|
import java.math.BigDecimal;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@SuppressWarnings("all")
|
@SuppressWarnings("all")
|
||||||
public class Utils {
|
public class Utils {
|
||||||
|
|||||||
@ -1,15 +1,5 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.visualization.service;
|
package com.iqser.red.service.redaction.v1.server.visualization.service;
|
||||||
|
|
||||||
import java.awt.Color;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
|
||||||
import org.apache.pdfbox.pdmodel.PDPage;
|
|
||||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
|
||||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
|
||||||
import org.springframework.stereotype.Service;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
||||||
@ -17,9 +7,17 @@ import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
|||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
|
import org.apache.pdfbox.pdmodel.PDPage;
|
||||||
|
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||||
|
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import java.awt.Color;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Service
|
@Service
|
||||||
@ -34,7 +32,7 @@ public class PdfVisualisationService {
|
|||||||
PDPage pdPage = document.getPage(page - 1);
|
PDPage pdPage = document.getPage(page - 1);
|
||||||
PDPageContentStream contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.APPEND, true);
|
PDPageContentStream contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.APPEND, true);
|
||||||
|
|
||||||
for(Paragraph paragraph : classifiedDoc.getParagraphs()) {
|
for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
|
||||||
|
|
||||||
for (int i = 0; i <= paragraph.getPageBlocks().size() - 1; i++) {
|
for (int i = 0; i <= paragraph.getPageBlocks().size() - 1; i++) {
|
||||||
|
|
||||||
@ -44,10 +42,10 @@ public class PdfVisualisationService {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (textBlock instanceof TextBlock) {
|
if (textBlock instanceof TextBlock) {
|
||||||
textBlock.setClassification((i+1) + "/" + paragraph.getPageBlocks().size());
|
textBlock.setClassification((i + 1) + "/" + paragraph.getPageBlocks().size());
|
||||||
visualizeTextBlock((TextBlock) textBlock, contentStream);
|
visualizeTextBlock((TextBlock) textBlock, contentStream);
|
||||||
} else if (textBlock instanceof Table) {
|
} else if (textBlock instanceof Table) {
|
||||||
textBlock.setClassification((i+1) + "/" + paragraph.getPageBlocks().size());
|
textBlock.setClassification((i + 1) + "/" + paragraph.getPageBlocks().size());
|
||||||
visualizeTable((Table) textBlock, contentStream);
|
visualizeTable((Table) textBlock, contentStream);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -59,7 +57,6 @@ public class PdfVisualisationService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public void visualizeClassifications(Document classifiedDoc, PDDocument document) throws IOException {
|
public void visualizeClassifications(Document classifiedDoc, PDDocument document) throws IOException {
|
||||||
|
|
||||||
for (int page = 1; page <= document.getNumberOfPages(); page++) {
|
for (int page = 1; page <= document.getNumberOfPages(); page++) {
|
||||||
|
|||||||
@ -1,4 +1,11 @@
|
|||||||
server:
|
server:
|
||||||
port: 8083
|
port: 8083
|
||||||
|
|
||||||
configuration-service.url: "http://localhost:8081"
|
configuration-service.url: "http://localhost:8081"
|
||||||
|
|
||||||
|
|
||||||
|
storage:
|
||||||
|
bucket-name: 'redaction'
|
||||||
|
endpoint: 'http://localhost:9000'
|
||||||
|
key: minioadmin
|
||||||
|
secret: minioadmin
|
||||||
|
|||||||
@ -17,4 +17,11 @@ management:
|
|||||||
prometheus.enabled: ${monitoring.enabled:false}
|
prometheus.enabled: ${monitoring.enabled:false}
|
||||||
health.enabled: true
|
health.enabled: true
|
||||||
endpoints.web.exposure.include: prometheus, health
|
endpoints.web.exposure.include: prometheus, health
|
||||||
metrics.export.prometheus.enabled: ${monitoring.enabled:false}
|
metrics.export.prometheus.enabled: ${monitoring.enabled:false}
|
||||||
|
|
||||||
|
|
||||||
|
storage:
|
||||||
|
signer-type: 'AWSS3V4SignerType'
|
||||||
|
bucket-name: 'redaction'
|
||||||
|
region: 'us-east-1'
|
||||||
|
endpoint: 'https://s3.amazonaws.com'
|
||||||
|
|||||||
@ -0,0 +1,34 @@
|
|||||||
|
package com.iqser.red.service.redaction.v1.server;
|
||||||
|
|
||||||
|
import com.iqser.red.storage.commons.exception.StorageObjectDoesNotExist;
|
||||||
|
import com.iqser.red.storage.commons.service.StorageService;
|
||||||
|
import org.springframework.core.io.InputStreamResource;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public class InMemoryStorageService extends StorageService {
|
||||||
|
|
||||||
|
private Map<String, byte[]> dataMap = new HashMap<>();
|
||||||
|
|
||||||
|
public InMemoryStorageService() {
|
||||||
|
super(null, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public InputStreamResource getObject(String objectId) {
|
||||||
|
|
||||||
|
var res = dataMap.get(objectId);
|
||||||
|
if (res == null) {
|
||||||
|
throw new StorageObjectDoesNotExist(new RuntimeException());
|
||||||
|
}
|
||||||
|
return new InputStreamResource(new ByteArrayInputStream(res));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void storeObject(String objectId, byte[] data) {
|
||||||
|
dataMap.put(objectId, data);
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,28 +1,20 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server;
|
package com.iqser.red.service.redaction.v1.server;
|
||||||
|
|
||||||
import static org.assertj.core.api.Assertions.assertThat;
|
import com.amazonaws.services.s3.AmazonS3;
|
||||||
import static org.mockito.Mockito.when;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import static org.springframework.boot.test.context.SpringBootTest.WebEnvironment.RANDOM_PORT;
|
import com.iqser.red.service.configuration.v1.api.model.*;
|
||||||
|
import com.iqser.red.service.file.management.v1.api.model.FileType;
|
||||||
import java.io.BufferedReader;
|
import com.iqser.red.service.redaction.v1.model.*;
|
||||||
import java.io.ByteArrayInputStream;
|
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||||
import java.io.File;
|
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||||
import java.io.FileInputStream;
|
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient;
|
||||||
import java.io.FileOutputStream;
|
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||||
import java.io.IOException;
|
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||||
import java.io.InputStream;
|
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||||
import java.io.InputStreamReader;
|
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||||
import java.net.URL;
|
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||||
import java.nio.charset.StandardCharsets;
|
import com.iqser.red.storage.commons.service.StorageService;
|
||||||
import java.time.OffsetDateTime;
|
import lombok.SneakyThrows;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.UUID;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
@ -37,40 +29,20 @@ import org.springframework.boot.test.context.SpringBootTest;
|
|||||||
import org.springframework.boot.test.context.TestConfiguration;
|
import org.springframework.boot.test.context.TestConfiguration;
|
||||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||||
import org.springframework.context.annotation.Bean;
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.context.annotation.Primary;
|
||||||
import org.springframework.core.io.ClassPathResource;
|
import org.springframework.core.io.ClassPathResource;
|
||||||
import org.springframework.test.context.junit4.SpringRunner;
|
import org.springframework.test.context.junit4.SpringRunner;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import java.io.*;
|
||||||
import com.iqser.red.service.configuration.v1.api.model.Colors;
|
import java.net.URL;
|
||||||
import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
|
import java.nio.charset.StandardCharsets;
|
||||||
import com.iqser.red.service.configuration.v1.api.model.DictionaryResponse;
|
import java.time.OffsetDateTime;
|
||||||
import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
|
import java.util.*;
|
||||||
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
import java.util.stream.Collectors;
|
||||||
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
|
import static org.mockito.Mockito.when;
|
||||||
import com.iqser.red.service.redaction.v1.model.AnnotateRequest;
|
import static org.springframework.boot.test.context.SpringBootTest.WebEnvironment.RANDOM_PORT;
|
||||||
import com.iqser.red.service.redaction.v1.model.AnnotateResponse;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.Comment;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.IdRemoval;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.ManualForceRedact;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.Point;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.ReanalyzeResult;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.RedactionResult;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.RenalyzeRequest;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.SectionText;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.Status;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
|
||||||
|
|
||||||
@RunWith(SpringRunner.class)
|
@RunWith(SpringRunner.class)
|
||||||
@SpringBootTest(webEnvironment = RANDOM_PORT)
|
@SpringBootTest(webEnvironment = RANDOM_PORT)
|
||||||
@ -116,6 +88,15 @@ public class RedactionIntegrationTest {
|
|||||||
@MockBean
|
@MockBean
|
||||||
private ImageClassificationClient imageClassificationClient;
|
private ImageClassificationClient imageClassificationClient;
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
private RedactionStorageService redactionStorageService;
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
private StorageService storageService;
|
||||||
|
|
||||||
|
@MockBean
|
||||||
|
private AmazonS3 amazonS3;
|
||||||
|
|
||||||
private final Map<String, List<String>> dictionary = new HashMap<>();
|
private final Map<String, List<String>> dictionary = new HashMap<>();
|
||||||
private final Map<String, String> typeColorMap = new HashMap<>();
|
private final Map<String, String> typeColorMap = new HashMap<>();
|
||||||
private final Map<String, Boolean> hintTypeMap = new HashMap<>();
|
private final Map<String, Boolean> hintTypeMap = new HashMap<>();
|
||||||
@ -126,6 +107,8 @@ public class RedactionIntegrationTest {
|
|||||||
private final Map<String, Long> reanlysisVersions = new HashMap<>();
|
private final Map<String, Long> reanlysisVersions = new HashMap<>();
|
||||||
|
|
||||||
private final static String TEST_RULESET_ID = "123";
|
private final static String TEST_RULESET_ID = "123";
|
||||||
|
private final static String TEST_PROJECT_ID = "123";
|
||||||
|
private final static String TEST_FILE_ID = "123";
|
||||||
|
|
||||||
@TestConfiguration
|
@TestConfiguration
|
||||||
public static class RedactionIntegrationTestConfiguration {
|
public static class RedactionIntegrationTestConfiguration {
|
||||||
@ -146,6 +129,12 @@ public class RedactionIntegrationTest {
|
|||||||
return kieServices.newKieContainer(kieModule.getReleaseId());
|
return kieServices.newKieContainer(kieModule.getReleaseId());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
@Primary
|
||||||
|
public StorageService inmemoryStorage() {
|
||||||
|
return new InMemoryStorageService();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -464,15 +453,16 @@ public class RedactionIntegrationTest {
|
|||||||
input.addAll(getPathsRecursively(file));
|
input.addAll(getPathsRecursively(file));
|
||||||
}
|
}
|
||||||
for (File path : input) {
|
for (File path : input) {
|
||||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
|
||||||
.ruleSetId(TEST_RULESET_ID)
|
AnalyzeRequest request = prepareStorage(new FileInputStream((path)));
|
||||||
.document(IOUtils.toByteArray(new FileInputStream(path)))
|
|
||||||
.build();
|
|
||||||
System.out.println("Redacting file : " + path.getName());
|
System.out.println("Redacting file : " + path.getName());
|
||||||
AnalyzeResult result = redactionController.analyze(request);
|
AnalyzeResult result = redactionController.analyze(request);
|
||||||
|
|
||||||
Map<String, List<RedactionLogEntry>> duplicates = new HashMap<>();
|
Map<String, List<RedactionLogEntry>> duplicates = new HashMap<>();
|
||||||
result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
|
|
||||||
|
var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID);
|
||||||
|
|
||||||
|
redactionLog.getRedactionLogEntry().forEach(entry -> {
|
||||||
duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry);
|
duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry);
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -484,13 +474,7 @@ public class RedactionIntegrationTest {
|
|||||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(1L);
|
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(1L);
|
||||||
|
|
||||||
long rstart = System.currentTimeMillis();
|
long rstart = System.currentTimeMillis();
|
||||||
ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder()
|
redactionController.reanalyze(request);
|
||||||
.redactionLog(result.getRedactionLog())
|
|
||||||
.document(IOUtils.toByteArray(new FileInputStream(path)))
|
|
||||||
.manualRedactions(null)
|
|
||||||
.text(result.getText())
|
|
||||||
.ruleSetId(TEST_RULESET_ID)
|
|
||||||
.build());
|
|
||||||
|
|
||||||
long rend = System.currentTimeMillis();
|
long rend = System.currentTimeMillis();
|
||||||
System.out.println("reanalysis analysis duration: " + (rend - rstart));
|
System.out.println("reanalysis analysis duration: " + (rend - rstart));
|
||||||
@ -528,15 +512,14 @@ public class RedactionIntegrationTest {
|
|||||||
System.out.println("redactionTest");
|
System.out.println("redactionTest");
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/new/Single Study - Oral (Gavage) Mouse.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/new/Single Study - Oral (Gavage) Mouse.pdf");
|
||||||
|
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
|
||||||
.ruleSetId(TEST_RULESET_ID)
|
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
|
||||||
.build();
|
|
||||||
|
|
||||||
AnalyzeResult result = redactionController.analyze(request);
|
AnalyzeResult result = redactionController.analyze(request);
|
||||||
|
|
||||||
result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
|
var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID);
|
||||||
|
var text = redactionStorageService.getText(TEST_PROJECT_ID, TEST_FILE_ID);
|
||||||
|
|
||||||
|
redactionLog.getRedactionLogEntry().forEach(entry -> {
|
||||||
if (entry.isImage()) {
|
if (entry.isImage()) {
|
||||||
System.out.println("---->" + entry.getType());
|
System.out.println("---->" + entry.getType());
|
||||||
}
|
}
|
||||||
@ -547,13 +530,13 @@ public class RedactionIntegrationTest {
|
|||||||
System.out.println("first analysis duration: " + (end - start));
|
System.out.println("first analysis duration: " + (end - start));
|
||||||
|
|
||||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Test.json")) {
|
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Test.json")) {
|
||||||
fileOutputStream.write(objectMapper.writeValueAsBytes(result.getText()));
|
fileOutputStream.write(objectMapper.writeValueAsBytes(redactionStorageService.getText(TEST_PROJECT_ID, TEST_FILE_ID)));
|
||||||
}
|
}
|
||||||
|
|
||||||
int correctFound = 0;
|
int correctFound = 0;
|
||||||
loop:
|
loop:
|
||||||
for (RedactionLogEntry redactionLogEntry : result.getRedactionLog().getRedactionLogEntry()) {
|
for (RedactionLogEntry redactionLogEntry : redactionLog.getRedactionLogEntry()) {
|
||||||
for (SectionText sectionText : result.getText().getSectionTexts()) {
|
for (SectionText sectionText : text.getSectionTexts()) {
|
||||||
if (redactionLogEntry.isImage()) {
|
if (redactionLogEntry.isImage()) {
|
||||||
correctFound++;
|
correctFound++;
|
||||||
continue loop;
|
continue loop;
|
||||||
@ -569,7 +552,7 @@ public class RedactionIntegrationTest {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assertThat(correctFound).isEqualTo(result.getRedactionLog().getRedactionLogEntry().size());
|
assertThat(correctFound).isEqualTo(redactionLog.getRedactionLogEntry().size());
|
||||||
|
|
||||||
dictionary.get(AUTHOR).add("properties");
|
dictionary.get(AUTHOR).add("properties");
|
||||||
reanlysisVersions.put("properties", 1L);
|
reanlysisVersions.put("properties", 1L);
|
||||||
@ -585,20 +568,14 @@ public class RedactionIntegrationTest {
|
|||||||
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(VERTEBRATE));
|
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(VERTEBRATE));
|
||||||
|
|
||||||
start = System.currentTimeMillis();
|
start = System.currentTimeMillis();
|
||||||
ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder()
|
AnalyzeResult reanalyzeResult = redactionController.reanalyze(request);
|
||||||
.redactionLog(result.getRedactionLog())
|
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
|
||||||
.text(result.getText())
|
|
||||||
.ruleSetId(TEST_RULESET_ID)
|
|
||||||
.build());
|
|
||||||
|
|
||||||
end = System.currentTimeMillis();
|
end = System.currentTimeMillis();
|
||||||
System.out.println("reanalysis analysis duration: " + (end - start));
|
System.out.println("reanalysis analysis duration: " + (end - start));
|
||||||
|
|
||||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
.projectId(TEST_PROJECT_ID)
|
||||||
.redactionLog(reanalyzeResult.getRedactionLog())
|
.fileId(TEST_FILE_ID)
|
||||||
.sectionGrid(result.getSectionGrid())
|
|
||||||
.build());
|
.build());
|
||||||
|
|
||||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
|
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
|
||||||
@ -613,19 +590,13 @@ public class RedactionIntegrationTest {
|
|||||||
|
|
||||||
System.out.println("testTableRedaction");
|
System.out.println("testTableRedaction");
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
|
||||||
|
|
||||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
|
||||||
.ruleSetId(TEST_RULESET_ID)
|
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
|
||||||
.build();
|
|
||||||
|
|
||||||
|
AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
||||||
AnalyzeResult result = redactionController.analyze(request);
|
AnalyzeResult result = redactionController.analyze(request);
|
||||||
|
|
||||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
.projectId(TEST_PROJECT_ID)
|
||||||
.redactionLog(result.getRedactionLog())
|
.fileId(TEST_FILE_ID)
|
||||||
.sectionGrid(result.getSectionGrid())
|
|
||||||
.build());
|
.build());
|
||||||
|
|
||||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
|
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
|
||||||
@ -680,12 +651,9 @@ public class RedactionIntegrationTest {
|
|||||||
|
|
||||||
// manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
|
// manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
|
||||||
|
|
||||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
|
||||||
.ruleSetId(TEST_RULESET_ID)
|
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
|
||||||
.manualRedactions(manualRedactions)
|
|
||||||
.build();
|
|
||||||
|
|
||||||
|
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||||
|
request.setManualRedactions(manualRedactions);
|
||||||
AnalyzeResult result = redactionController.analyze(request);
|
AnalyzeResult result = redactionController.analyze(request);
|
||||||
|
|
||||||
manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
|
manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
|
||||||
@ -694,20 +662,15 @@ public class RedactionIntegrationTest {
|
|||||||
.status(Status.APPROVED)
|
.status(Status.APPROVED)
|
||||||
.build()));
|
.build()));
|
||||||
|
|
||||||
ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder()
|
redactionController.reanalyze(request);
|
||||||
.redactionLog(result.getRedactionLog())
|
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
|
||||||
.manualRedactions(manualRedactions)
|
|
||||||
.text(result.getText())
|
|
||||||
.ruleSetId(TEST_RULESET_ID)
|
|
||||||
.build());
|
|
||||||
|
|
||||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
.projectId(TEST_PROJECT_ID)
|
||||||
.redactionLog(reanalyzeResult.getRedactionLog())
|
.fileId(TEST_FILE_ID)
|
||||||
.sectionGrid(result.getSectionGrid())
|
|
||||||
.build());
|
.build());
|
||||||
|
|
||||||
|
|
||||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
|
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
|
||||||
fileOutputStream.write(annotateResponse.getDocument());
|
fileOutputStream.write(annotateResponse.getDocument());
|
||||||
}
|
}
|
||||||
@ -724,11 +687,16 @@ public class RedactionIntegrationTest {
|
|||||||
System.out.println("classificationTest");
|
System.out.println("classificationTest");
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Trinexapac/93 Trinexapac-ethyl_RAR_03_Volume_3CA_B-1_2017-03-31.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/Trinexapac/93 Trinexapac-ethyl_RAR_03_Volume_3CA_B-1_2017-03-31.pdf");
|
||||||
|
|
||||||
RedactionRequest request = RedactionRequest.builder()
|
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||||
|
|
||||||
|
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||||
|
.projectId(request.getProjectId())
|
||||||
|
.fileId(request.getFileId())
|
||||||
|
.ruleSetId(request.getRuleSetId())
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
RedactionResult result = redactionController.classify(request);
|
RedactionResult result = redactionController.classify(redactionRequest);
|
||||||
|
|
||||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Classified.pdf")) {
|
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Classified.pdf")) {
|
||||||
fileOutputStream.write(result.getDocument());
|
fileOutputStream.write(result.getDocument());
|
||||||
@ -742,11 +710,15 @@ public class RedactionIntegrationTest {
|
|||||||
System.out.println("sectionsTest");
|
System.out.println("sectionsTest");
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 " + "Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 " + "Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
|
||||||
|
|
||||||
RedactionRequest request = RedactionRequest.builder()
|
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
|
||||||
|
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||||
|
.projectId(request.getProjectId())
|
||||||
|
.fileId(request.getFileId())
|
||||||
|
.ruleSetId(request.getRuleSetId())
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
RedactionResult result = redactionController.sections(request);
|
RedactionResult result = redactionController.sections(redactionRequest);
|
||||||
|
|
||||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Sections.pdf")) {
|
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Sections.pdf")) {
|
||||||
fileOutputStream.write(result.getDocument());
|
fileOutputStream.write(result.getDocument());
|
||||||
@ -760,11 +732,15 @@ public class RedactionIntegrationTest {
|
|||||||
System.out.println("htmlTablesTest");
|
System.out.println("htmlTablesTest");
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
||||||
|
|
||||||
RedactionRequest request = RedactionRequest.builder()
|
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
|
||||||
|
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||||
|
.projectId(request.getProjectId())
|
||||||
|
.fileId(request.getFileId())
|
||||||
|
.ruleSetId(request.getRuleSetId())
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
RedactionResult result = redactionController.htmlTables(request);
|
RedactionResult result = redactionController.htmlTables(redactionRequest);
|
||||||
|
|
||||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Tables.html")) {
|
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Tables.html")) {
|
||||||
fileOutputStream.write(result.getDocument());
|
fileOutputStream.write(result.getDocument());
|
||||||
@ -778,11 +754,15 @@ public class RedactionIntegrationTest {
|
|||||||
System.out.println("htmlTableRotationTest");
|
System.out.println("htmlTableRotationTest");
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
||||||
|
|
||||||
RedactionRequest request = RedactionRequest.builder()
|
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
|
||||||
|
RedactionRequest redactionRequest = RedactionRequest.builder()
|
||||||
|
.projectId(request.getProjectId())
|
||||||
|
.fileId(request.getFileId())
|
||||||
|
.ruleSetId(request.getRuleSetId())
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
RedactionResult result = redactionController.htmlTables(request);
|
RedactionResult result = redactionController.htmlTables(redactionRequest);
|
||||||
|
|
||||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Tables.html")) {
|
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Tables.html")) {
|
||||||
fileOutputStream.write(result.getDocument());
|
fileOutputStream.write(result.getDocument());
|
||||||
@ -795,20 +775,45 @@ public class RedactionIntegrationTest {
|
|||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Phantom Cells.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Phantom Cells.pdf");
|
||||||
|
|
||||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||||
.ruleSetId(TEST_RULESET_ID)
|
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
|
||||||
.build();
|
|
||||||
|
|
||||||
AnalyzeResult result = redactionController.analyze(request);
|
AnalyzeResult result = redactionController.analyze(request);
|
||||||
|
|
||||||
result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
|
var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID);
|
||||||
|
|
||||||
|
redactionLog.getRedactionLogEntry().forEach(entry -> {
|
||||||
if (!entry.isHint()) {
|
if (!entry.isHint()) {
|
||||||
assertThat(entry.getReason()).isEqualTo("Not redacted because row is not a vertebrate study");
|
assertThat(entry.getReason()).isEqualTo("Not redacted because row is not a vertebrate study");
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SneakyThrows
|
||||||
|
private AnalyzeRequest prepareStorage(String file) {
|
||||||
|
ClassPathResource pdfFileResource = new ClassPathResource(file);
|
||||||
|
|
||||||
|
return prepareStorage(pdfFileResource.getInputStream());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@SneakyThrows
|
||||||
|
private AnalyzeRequest prepareStorage(InputStream stream) {
|
||||||
|
|
||||||
|
AnalyzeRequest request = AnalyzeRequest.builder()
|
||||||
|
.ruleSetId(TEST_RULESET_ID)
|
||||||
|
.projectId(TEST_PROJECT_ID)
|
||||||
|
.fileId(TEST_FILE_ID)
|
||||||
|
.lastProcessed(OffsetDateTime.now())
|
||||||
|
.build();
|
||||||
|
|
||||||
|
var bytes = IOUtils.toByteArray(stream);
|
||||||
|
|
||||||
|
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_PROJECT_ID, TEST_FILE_ID, FileType.ORIGIN), bytes);
|
||||||
|
|
||||||
|
return request;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void sponsorCompanyTest() throws IOException {
|
public void sponsorCompanyTest() throws IOException {
|
||||||
@ -816,17 +821,14 @@ public class RedactionIntegrationTest {
|
|||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/sponsor_companies.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/sponsor_companies.pdf");
|
||||||
|
|
||||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
|
||||||
.ruleSetId(TEST_RULESET_ID)
|
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
|
||||||
.build();
|
|
||||||
|
|
||||||
AnalyzeResult result = redactionController.analyze(request);
|
AnalyzeResult result = redactionController.analyze(request);
|
||||||
|
|
||||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
.projectId(TEST_PROJECT_ID)
|
||||||
.redactionLog(result.getRedactionLog())
|
.fileId(TEST_FILE_ID)
|
||||||
.sectionGrid(result.getSectionGrid())
|
|
||||||
.build());
|
.build());
|
||||||
|
|
||||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
|
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
|
||||||
@ -857,4 +859,4 @@ public class RedactionIntegrationTest {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,12 +1,8 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||||
|
|
||||||
import com.iqser.red.service.configuration.v1.api.model.Colors;
|
import com.amazonaws.services.s3.AmazonS3;
|
||||||
import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
|
import com.iqser.red.service.configuration.v1.api.model.*;
|
||||||
import com.iqser.red.service.configuration.v1.api.model.DictionaryResponse;
|
import com.iqser.red.service.redaction.v1.server.InMemoryStorageService;
|
||||||
import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
|
|
||||||
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
|
|
||||||
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
|
|
||||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||||
@ -14,7 +10,7 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
|||||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||||
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||||
import org.apache.commons.io.IOUtils;
|
import com.iqser.red.storage.commons.service.StorageService;
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Ignore;
|
import org.junit.Ignore;
|
||||||
@ -30,6 +26,7 @@ import org.springframework.boot.test.context.SpringBootTest;
|
|||||||
import org.springframework.boot.test.context.TestConfiguration;
|
import org.springframework.boot.test.context.TestConfiguration;
|
||||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||||
import org.springframework.context.annotation.Bean;
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.context.annotation.Primary;
|
||||||
import org.springframework.core.io.ClassPathResource;
|
import org.springframework.core.io.ClassPathResource;
|
||||||
import org.springframework.test.context.junit4.SpringRunner;
|
import org.springframework.test.context.junit4.SpringRunner;
|
||||||
|
|
||||||
@ -40,15 +37,8 @@ import java.io.InputStream;
|
|||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import static org.assertj.core.api.Assertions.assertThat;
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
import static org.mockito.Mockito.when;
|
import static org.mockito.Mockito.when;
|
||||||
@ -80,6 +70,9 @@ public class EntityRedactionServiceTest {
|
|||||||
@Autowired
|
@Autowired
|
||||||
private DroolsExecutionService droolsExecutionService;
|
private DroolsExecutionService droolsExecutionService;
|
||||||
|
|
||||||
|
@MockBean
|
||||||
|
private AmazonS3 amazonS3;
|
||||||
|
|
||||||
private final static String TEST_RULESET_ID = "123";
|
private final static String TEST_RULESET_ID = "123";
|
||||||
|
|
||||||
@TestConfiguration
|
@TestConfiguration
|
||||||
@ -101,6 +94,13 @@ public class EntityRedactionServiceTest {
|
|||||||
return kieServices.newKieContainer(kieModule.getReleaseId());
|
return kieServices.newKieContainer(kieModule.getReleaseId());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
@Primary
|
||||||
|
public StorageService inmemoryStorage() {
|
||||||
|
return new InMemoryStorageService();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -125,10 +125,6 @@ public class EntityRedactionServiceTest {
|
|||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
|
||||||
|
|
||||||
RedactionRequest redactionRequest = RedactionRequest.builder()
|
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
|
||||||
.build();
|
|
||||||
|
|
||||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||||
.entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H.")))
|
.entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H.")))
|
||||||
.build();
|
.build();
|
||||||
@ -144,7 +140,7 @@ public class EntityRedactionServiceTest {
|
|||||||
.build();
|
.build();
|
||||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
|
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
|
||||||
|
|
||||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
|
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
|
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
|
||||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||||
@ -158,10 +154,6 @@ public class EntityRedactionServiceTest {
|
|||||||
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/nested_redaction.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/nested_redaction.pdf");
|
||||||
|
|
||||||
RedactionRequest redactionRequest = RedactionRequest.builder()
|
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
|
||||||
.build();
|
|
||||||
|
|
||||||
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
|
||||||
.entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H.")))
|
.entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H.")))
|
||||||
.build();
|
.build();
|
||||||
@ -176,7 +168,7 @@ public class EntityRedactionServiceTest {
|
|||||||
.build();
|
.build();
|
||||||
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
|
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
|
||||||
|
|
||||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
|
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||||
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
|
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
|
||||||
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
|
||||||
@ -526,4 +518,4 @@ public class EntityRedactionServiceTest {
|
|||||||
return dictionaryEntries;
|
return dictionaryEntries;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,16 +1,14 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.segmentation;
|
package com.iqser.red.service.redaction.v1.server.segmentation;
|
||||||
|
|
||||||
import static org.assertj.core.api.Assertions.assertThat;
|
import com.amazonaws.services.s3.AmazonS3;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||||
import java.io.ByteArrayOutputStream;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||||
import java.io.FileOutputStream;
|
import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
|
||||||
import java.io.IOException;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||||
import java.util.Collections;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||||
import java.util.List;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||||
import java.util.stream.Collectors;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
|
||||||
import javax.imageio.ImageIO;
|
|
||||||
|
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
import org.junit.Ignore;
|
import org.junit.Ignore;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
@ -22,15 +20,15 @@ import org.springframework.boot.test.mock.mockito.MockBean;
|
|||||||
import org.springframework.core.io.ClassPathResource;
|
import org.springframework.core.io.ClassPathResource;
|
||||||
import org.springframework.test.context.junit4.SpringRunner;
|
import org.springframework.test.context.junit4.SpringRunner;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
import javax.imageio.ImageIO;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
import java.io.ByteArrayOutputStream;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
|
import java.io.FileOutputStream;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
import java.io.IOException;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
import java.util.Collections;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
import java.util.List;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
import java.util.stream.Collectors;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
@SpringBootTest
|
@SpringBootTest
|
||||||
@RunWith(SpringRunner.class)
|
@RunWith(SpringRunner.class)
|
||||||
@ -51,6 +49,8 @@ public class PdfSegmentationServiceTest {
|
|||||||
@MockBean
|
@MockBean
|
||||||
private KieContainer kieContainer;
|
private KieContainer kieContainer;
|
||||||
|
|
||||||
|
@MockBean
|
||||||
|
private AmazonS3 amazonS3;
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@Ignore
|
@Ignore
|
||||||
@ -76,6 +76,29 @@ public class PdfSegmentationServiceTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPDFSegmentationWithComplexTable() throws IOException {
|
||||||
|
|
||||||
|
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf");
|
||||||
|
|
||||||
|
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||||
|
Document document = pdfSegmentationService.parseDocument(pdDocument);
|
||||||
|
assertThat(document.getParagraphs()
|
||||||
|
.stream()
|
||||||
|
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||||
|
.collect(Collectors.toList())).isNotEmpty();
|
||||||
|
Table table = document.getParagraphs()
|
||||||
|
.stream()
|
||||||
|
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0);
|
||||||
|
assertThat(table.getColCount()).isEqualTo(6);
|
||||||
|
assertThat(table.getRowCount()).isEqualTo(13);
|
||||||
|
assertThat(table.getRows().stream().mapToInt(List::size).sum()).isEqualTo(6 * 13);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testTableExtraction() throws IOException {
|
public void testTableExtraction() throws IOException {
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user