RED-3800 switched jackson for dsl-json for internal non-rest related operations

This commit is contained in:
Timo Bejan 2022-05-09 22:00:01 +03:00
parent df2fcd2e5e
commit 8a44d6299c
50 changed files with 313 additions and 328 deletions

View File

@ -23,20 +23,24 @@
<properties>
<pdfbox.version>2.0.24</pdfbox.version>
<dsljson.version>1.9.9</dsljson.version>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>com.dslplatform</groupId>
<artifactId>dsl-json-java8</artifactId>
<version>${dsljson.version}</version>
</dependency>
<dependency>
<groupId>com.iqser.red</groupId>
<artifactId>platform-commons-dependency</artifactId>
<version>1.11.0</version>
<version>1.13.0</version>
<scope>import</scope>
<type>pom</type>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
@ -47,9 +51,7 @@
<artifactId>pdfbox-tools</artifactId>
<version>${pdfbox.version}</version>
</dependency>
</dependencies>
</dependencyManagement>
<build>
@ -58,12 +60,10 @@
<plugin>
<groupId>org.sonarsource.scanner.maven</groupId>
<artifactId>sonar-maven-plugin</artifactId>
<version>3.9.0.2155</version>
</plugin>
<plugin>
<groupId>org.owasp</groupId>
<artifactId>dependency-check-maven</artifactId>
<version>6.3.1</version>
<configuration>
<format>ALL</format>
</configuration>
@ -88,27 +88,5 @@
</plugin>
</plugins>
</pluginManagement>
<plugins>
<plugin>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<version>0.8.8</version>
<executions>
<execution>
<id>prepare-agent</id>
<goals>
<goal>prepare-agent</goal>
</goals>
</execution>
<execution>
<id>report</id>
<goals>
<goal>report-aggregate</goal>
</goals>
<phase>verify</phase>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -16,11 +16,20 @@
</properties>
<dependencies>
<!-- https://mvnrepository.com/artifact/com.dslplatform/dsl-json-java8 -->
<dependency>
<groupId>com.dslplatform</groupId>
<artifactId>dsl-json-java8</artifactId>
<version>${dsljson.version}</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-web</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>com.iqser.red.service</groupId>
<artifactId>persistence-service-api-v1</artifactId>
@ -32,5 +41,21 @@
</exclusion>
</exclusions>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<annotationProcessors>
<annotationProcessor>lombok.launch.AnnotationProcessorHider$AnnotationProcessor</annotationProcessor>
<annotationProcessor>com.dslplatform.json.processor.CompiledJsonAnnotationProcessor</annotationProcessor>
</annotationProcessors>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -22,7 +22,7 @@ public class ManualChange {
private OffsetDateTime processedDate;
private OffsetDateTime requestedDate;
private String userId;
private Map<String, Object> propertyChanges = new HashMap<>();
private Map<String, String> propertyChanges = new HashMap<>();
public boolean isProcessed() {
return processedDate != null;
@ -42,7 +42,7 @@ public class ManualChange {
return this;
}
public ManualChange withChange(String property, Object value) {
public ManualChange withChange(String property, String value) {
this.propertyChanges.put(property, value);
return this;
}

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.model;
import com.dslplatform.json.CompiledJson;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.model;
import com.dslplatform.json.CompiledJson;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

View File

@ -1,14 +1,19 @@
package com.iqser.red.service.redaction.v1.model;
import com.dslplatform.json.CompiledJson;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.legalbasis.LegalBasis;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.util.ArrayList;
import java.util.List;
@Data
@CompiledJson
@AllArgsConstructor
@NoArgsConstructor
public class RedactionLog {
@ -23,8 +28,8 @@ public class RedactionLog {
*/
private int analysisNumber;
private List<RedactionLogEntry> redactionLogEntry;
private List<LegalBasis> legalBasis;
private List<RedactionLogEntry> redactionLogEntry = new ArrayList<>();
private List<RedactionLogLegalBasis> legalBasis = new ArrayList<>();
private long dictionaryVersion = -1;
private long dossierDictionaryVersion = -1;

View File

@ -0,0 +1,22 @@
package com.iqser.red.service.redaction.v1.model;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.time.OffsetDateTime;
@Data
@NoArgsConstructor
@AllArgsConstructor
public class RedactionLogComment {
private long id;
private String user;
private String text;
private String annotationId;
private String fileId;
private OffsetDateTime date;
private OffsetDateTime softDeletedTime;
}

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.model;
import com.dslplatform.json.CompiledJson;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.Comment;
import lombok.*;
@ -41,7 +42,7 @@ public class RedactionLogEntry {
private String textAfter;
@Builder.Default
private List<Comment> comments = new ArrayList<>();
private List<RedactionLogComment> comments = new ArrayList<>();
private int startOffset;
private int endOffset;

View File

@ -0,0 +1,16 @@
package com.iqser.red.service.redaction.v1.model;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
@AllArgsConstructor
public class RedactionLogLegalBasis {
private String name;
private String description;
private String reason;
}

View File

@ -1,29 +1,17 @@
package com.iqser.red.service.redaction.v1.model;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import com.dslplatform.json.CompiledJson;
import lombok.*;
@Data
@RequiredArgsConstructor
@NoArgsConstructor
@AllArgsConstructor
public class SectionArea {
@NonNull
private Point topLeft;
@NonNull
private float width;
@NonNull
private float height;
@NonNull
private int page;
private String header;
public boolean contains(Rectangle other) {

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.model;
import com.dslplatform.json.CompiledJson;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@ -8,6 +9,7 @@ import lombok.RequiredArgsConstructor;
import java.util.*;
@Data
@CompiledJson
@AllArgsConstructor
@NoArgsConstructor
public class SectionGrid {
@ -17,13 +19,14 @@ public class SectionGrid {
private List<SectionGridSection> sections = new ArrayList<>();
@Data
@RequiredArgsConstructor
@NoArgsConstructor
@AllArgsConstructor
public static class SectionGridSection {
private final int sectionNumber;
private final String headline;
private final Set<Integer> pages;
private final List<SectionArea> sectionAreas;
private int sectionNumber;
private String headline;
private Set<Integer> pages;
private List<SectionArea> sectionAreas;
}
}

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.model;
import com.dslplatform.json.CompiledJson;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@ -11,22 +12,12 @@ import java.util.List;
@Data
@AllArgsConstructor
@NoArgsConstructor
@RequiredArgsConstructor
public class SectionRectangle {
@NonNull
private Point topLeft;
@NonNull
private float width;
@NonNull
private float height;
@NonNull
private int part;
@NonNull
private int numberOfParts;
private List<CellRectangle> tableCells;

View File

@ -12,11 +12,12 @@
<artifactId>redaction-service-server-v1</artifactId>
<properties>
<drools.version>7.59.0.Final</drools.version>
<kie.version>7.59.0.Final</kie.version>
<locationtech.version>1.16.1</locationtech.version>
<pdfbox.jbig2-imageio.version>3.0.3</pdfbox.jbig2-imageio.version>
<jai-imageio.version>1.4.0</jai-imageio.version>
<drools.version>7.68.0.Final</drools.version>
<kie.version>7.68.0.Final</kie.version>
<locationtech.version>1.18.2</locationtech.version>
<javaassist.version>3.28.0-GA</javaassist.version>
<ahocorasick.version>0.6.3</ahocorasick.version>
<jackson.version>2.13.2</jackson.version>
</properties>
<dependencies>
@ -26,15 +27,21 @@
</dependency>
<dependency>
<groupId>org.ahocorasick</groupId>
<artifactId>ahocorasick</artifactId>
<version>0.6.3</version>
<groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-afterburner</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>org.openjdk.jol</groupId>
<artifactId>jol-core</artifactId>
<version>0.10</version>
<groupId>org.ahocorasick</groupId>
<artifactId>ahocorasick</artifactId>
<version>${ahocorasick.version}</version>
</dependency>
<dependency>
<groupId>org.javassist</groupId>
<artifactId>javassist</artifactId>
<version>${javaassist.version}</version>
</dependency>
<dependency>
@ -62,22 +69,6 @@
<artifactId>guava</artifactId>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>jbig2-imageio</artifactId>
<version>${pdfbox.jbig2-imageio.version}</version>
</dependency>
<dependency>
<groupId>com.github.jai-imageio</groupId>
<artifactId>jai-imageio-core</artifactId>
<version>${jai-imageio.version}</version>
</dependency>
<dependency>
<groupId>com.github.jai-imageio</groupId>
<artifactId>jai-imageio-jpeg2000</artifactId>
<version>${jai-imageio.version}</version>
</dependency>
<!-- commons -->
<dependency>
<groupId>com.iqser.red.commons</groupId>
@ -126,6 +117,18 @@
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<annotationProcessors>
<annotationProcessor>lombok.launch.AnnotationProcessorHider$AnnotationProcessor</annotationProcessor>
<annotationProcessor>com.dslplatform.json.processor.CompiledJsonAnnotationProcessor</annotationProcessor>
</annotationProcessors>
</configuration>
</plugin>
<plugin>
<!-- generate git.properties for exposure in /info -->
<groupId>pl.project13.maven</groupId>

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server.classification.model;
import com.dslplatform.json.JsonAttribute;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import lombok.AllArgsConstructor;
@ -14,6 +15,7 @@ public class Footer {
private List<TextBlock> textBlocks;
@JsonIgnore
@JsonAttribute(ignore = true)
public SearchableText getSearchableText() {
SearchableText searchableText = new SearchableText();

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server.classification.model;
import com.dslplatform.json.JsonAttribute;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import lombok.AllArgsConstructor;
@ -14,6 +15,7 @@ public class Header {
private List<TextBlock> textBlocks;
@JsonIgnore
@JsonAttribute(ignore = true)
public SearchableText getSearchableText() {
SearchableText searchableText = new SearchableText();

View File

@ -1,5 +1,7 @@
package com.iqser.red.service.redaction.v1.server.classification.model;
import com.dslplatform.json.CompiledJson;
import com.dslplatform.json.JsonAttribute;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.iqser.red.service.redaction.v1.model.SectionArea;
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
@ -14,6 +16,7 @@ import java.util.*;
@Data
@Builder
@CompiledJson
@NoArgsConstructor
@AllArgsConstructor
public class SectionText {
@ -38,6 +41,7 @@ public class SectionText {
}
@JsonIgnore
@JsonAttribute(ignore = true)
public SearchableText getSearchableText() {
SearchableText searchableText = new SearchableText();

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server.classification.model;
import com.dslplatform.json.CompiledJson;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@ -8,6 +9,7 @@ import java.util.ArrayList;
import java.util.List;
@Data
@CompiledJson
@NoArgsConstructor
@AllArgsConstructor
public class Text {

View File

@ -1,5 +1,7 @@
package com.iqser.red.service.redaction.v1.server.classification.model;
import com.dslplatform.json.CompiledJson;
import com.dslplatform.json.JsonAttribute;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
@ -15,6 +17,7 @@ import java.util.List;
@AllArgsConstructor
@Builder
@Data
@CompiledJson
@NoArgsConstructor
public class TextBlock extends AbstractTextContainer {
@ -121,6 +124,7 @@ public class TextBlock extends AbstractTextContainer {
@Override
@JsonIgnore
@JsonAttribute(ignore = true)
public String getText() {
StringBuilder sb = new StringBuilder();

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server.classification.model;
import com.dslplatform.json.JsonAttribute;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import lombok.AllArgsConstructor;
@ -14,6 +15,7 @@ public class UnclassifiedText {
private List<TextBlock> textBlocks;
@JsonIgnore
@JsonAttribute(ignore = true)
public SearchableText getSearchableText() {
SearchableText searchableText = new SearchableText();

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server.client.model;
import com.dslplatform.json.CompiledJson;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
@ -7,6 +8,7 @@ import lombok.NoArgsConstructor;
@Data
@Builder
@CompiledJson
@AllArgsConstructor
@NoArgsConstructor
public class EntityRecogintionEntity {

View File

@ -4,18 +4,18 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.dslplatform.json.CompiledJson;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@CompiledJson
@NoArgsConstructor
@AllArgsConstructor
public class NerEntities {
@Builder.Default
private Map<Integer, List<EntityRecogintionEntity>> data = new HashMap<>();
}

View File

@ -1,52 +0,0 @@
package com.iqser.red.service.redaction.v1.server.memory;
import lombok.extern.slf4j.Slf4j;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
@Slf4j
public class MemoryStats {
public static void printMemoryStats() {
log.info("\n\n ------------------------------ \n" +
" Used Memory: " + humanReadableByteCountBin(getUsedMemory()) + "\n" +
" Free Memory: " + humanReadableByteCountBin(getFreeMemory()) + "\n" +
" Total Memory: " + humanReadableByteCountBin(getTotalMemory()) + "\n" +
" Max Memory: " + humanReadableByteCountBin(getMaxMemory()) + "\n" +
"\n ------------------------------ \n");
}
public static String humanReadableByteCountBin(long bytes) {
long absB = bytes == Long.MIN_VALUE ? Long.MAX_VALUE : Math.abs(bytes);
if (absB < 1024) {
return bytes + " B";
}
long value = absB;
CharacterIterator ci = new StringCharacterIterator("KMGTPE");
for (int i = 40; i >= 0 && absB > 0xfffccccccccccccL >> i; i -= 10) {
value >>= 10;
ci.next();
}
value *= Long.signum(bytes);
return String.format("%.1f %ciB", value / 1024.0, ci.current());
}
private static long getMaxMemory() {
return Runtime.getRuntime().maxMemory();
}
private static long getUsedMemory() {
return getMaxMemory() - getFreeMemory();
}
private static long getTotalMemory() {
return Runtime.getRuntime().totalMemory();
}
private static long getFreeMemory() {
return Runtime.getRuntime().freeMemory();
}
}

View File

@ -1,52 +1,26 @@
package com.iqser.red.service.redaction.v1.server.parsing;
import java.awt.geom.Point2D;
import java.awt.geom.Rectangle2D;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.reflect.FieldUtils;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.contentstream.operator.OperatorName;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColor;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorN;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorSpace;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceCMYKColor;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceGrayColor;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceRGBColor;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColor;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorN;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorSpace;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceCMYKColor;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceGrayColor;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceRGBColor;
import org.apache.pdfbox.contentstream.operator.state.SetFlatness;
import org.apache.pdfbox.contentstream.operator.state.SetLineCapStyle;
import org.apache.pdfbox.contentstream.operator.state.SetLineDashPattern;
import org.apache.pdfbox.contentstream.operator.state.SetLineJoinStyle;
import org.apache.pdfbox.contentstream.operator.state.SetLineMiterLimit;
import org.apache.pdfbox.contentstream.operator.state.SetLineWidth;
import org.apache.pdfbox.contentstream.operator.state.SetRenderingIntent;
import org.apache.pdfbox.contentstream.operator.text.SetFontAndSize;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.text.TextPosition;
import org.apache.pdfbox.util.Matrix;
import com.iqser.red.service.redaction.v1.server.parsing.model.RedTextPosition;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.contentstream.operator.OperatorName;
import org.apache.pdfbox.contentstream.operator.color.*;
import org.apache.pdfbox.contentstream.operator.state.*;
import org.apache.pdfbox.contentstream.operator.text.SetFontAndSize;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.text.TextPosition;
import java.awt.geom.Point2D;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@Slf4j
public class PDFLinesTextStripper extends PDFTextStripper {
@ -66,8 +40,6 @@ public class PDFLinesTextStripper extends PDFTextStripper {
private int minCharHeight;
@Getter
private int maxCharHeight;
@Getter
private List<PdfImage> images = new ArrayList<>();
private float path_x;
private float path_y;
@ -183,9 +155,6 @@ public class PDFLinesTextStripper extends PDFTextStripper {
graphicsPath.clear();
break;
// case OperatorName.DRAW_OBJECT:
// processImageOperation(arguments);
// break;
}
@ -193,32 +162,6 @@ public class PDFLinesTextStripper extends PDFTextStripper {
}
protected void processImageOperation(List<COSBase> arguments) {
try {
COSName objectName = (COSName) arguments.get(0);
PDXObject xobject = getResources().getXObject(objectName);
if (xobject instanceof PDImageXObject) {
PDImageXObject image = (PDImageXObject) xobject;
Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix();
Rectangle2D rect = new Rectangle2D.Float(ctmNew.getTranslateX(), ctmNew.getTranslateY(), ctmNew.getScaleX(), ctmNew.getScaleY());
// Memory Hack - sofReference kills me
FieldUtils.writeField(image, "cachedImageSubsampling", -1, true);
if (rect.getHeight() > 2 && rect.getWidth() > 2) {
this.images.add(new PdfImage(image.getImage(), rect, pageNumber, image.getImage()
.getColorModel()
.hasAlpha()));
}
}
} catch (Exception e) {
log.warn("Problem during image extraction: {}", e.getMessage());
}
}
private float floatValue(COSBase value) {
if (value instanceof COSNumber) {
@ -375,7 +318,6 @@ public class PDFLinesTextStripper extends PDFTextStripper {
minCharHeight = Integer.MAX_VALUE;
maxCharHeight = 0;
textPositionSequences.clear();
images = new ArrayList<>();
rulings.clear();
graphicsPath.clear();
path_x = 0.0f;

View File

@ -1,5 +1,7 @@
package com.iqser.red.service.redaction.v1.server.parsing.model;
import com.dslplatform.json.CompiledJson;
import com.dslplatform.json.JsonAttribute;
import com.fasterxml.jackson.annotation.JsonIgnore;
import lombok.Data;
import lombok.NoArgsConstructor;
@ -9,6 +11,7 @@ import org.springframework.beans.BeanUtils;
@Data
@NoArgsConstructor
@CompiledJson
public class RedTextPosition {
private String textMatrix;
@ -26,14 +29,17 @@ public class RedTextPosition {
// not used in reanalysis
@JsonIgnore
@JsonAttribute(ignore = true)
private float widthOfSpace;
// not used in reanalysis
@JsonIgnore
@JsonAttribute(ignore = true)
private float fontSizeInPt;
// not used in reanalysis
@JsonIgnore
@JsonAttribute(ignore = true)
private String fontName;

View File

@ -1,22 +1,23 @@
package com.iqser.red.service.redaction.v1.server.parsing.model;
import com.dslplatform.json.CompiledJson;
import com.dslplatform.json.JsonAttribute;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.iqser.red.service.redaction.v1.model.Point;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.text.TextPosition;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.pdfbox.text.TextPosition;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.iqser.red.service.redaction.v1.model.Point;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Data
@CompiledJson
@NoArgsConstructor
@JsonIgnoreProperties({"empty"})
public class TextPositionSequence implements CharSequence {
@ -27,7 +28,6 @@ public class TextPositionSequence implements CharSequence {
private float x1;
private float x2;
public TextPositionSequence(int page) {
this.page = page;
@ -112,6 +112,7 @@ public class TextPositionSequence implements CharSequence {
@JsonIgnore
@JsonAttribute(ignore = true)
public float getX1() {
if (textPositions.get(0).getRotation() == 90) {
@ -123,6 +124,7 @@ public class TextPositionSequence implements CharSequence {
@JsonIgnore
@JsonAttribute(ignore = true)
public float getX2() {
if (textPositions.get(0).getRotation() == 90) {
@ -135,6 +137,7 @@ public class TextPositionSequence implements CharSequence {
@JsonIgnore
@JsonAttribute(ignore = true)
public float getRotationAdjustedY() {
return textPositions.get(0).getY();
@ -142,6 +145,7 @@ public class TextPositionSequence implements CharSequence {
@JsonIgnore
@JsonAttribute(ignore = true)
public float getRotationAdjustedX() {
return textPositions.get(0).getXDirAdj();
@ -149,6 +153,7 @@ public class TextPositionSequence implements CharSequence {
@JsonIgnore
@JsonAttribute(ignore = true)
public float getY1() {
if (textPositions.get(0).getRotation() == 90) {
@ -160,6 +165,7 @@ public class TextPositionSequence implements CharSequence {
@JsonIgnore
@JsonAttribute(ignore = true)
public float getY2() {
if (textPositions.get(0).getRotation() == 90) {
@ -171,6 +177,7 @@ public class TextPositionSequence implements CharSequence {
@JsonIgnore
@JsonAttribute(ignore = true)
public float getTextHeight() {
return textPositions.get(0).getHeightDir() + 2;
@ -178,6 +185,7 @@ public class TextPositionSequence implements CharSequence {
@JsonIgnore
@JsonAttribute(ignore = true)
public float getHeight() {
return getY2() - getY1();
@ -185,6 +193,7 @@ public class TextPositionSequence implements CharSequence {
@JsonIgnore
@JsonAttribute(ignore = true)
public float getWidth() {
return getX2() - getX1();
@ -192,6 +201,7 @@ public class TextPositionSequence implements CharSequence {
@JsonIgnore
@JsonAttribute(ignore = true)
public String getFont() {
return textPositions.get(0).getFontName().toLowerCase().replaceAll(",bold", "").replaceAll(",italic", "");
@ -199,6 +209,7 @@ public class TextPositionSequence implements CharSequence {
@JsonIgnore
@JsonAttribute(ignore = true)
public String getFontStyle() {
String lowercaseFontName = textPositions.get(0).getFontName().toLowerCase();
@ -217,6 +228,7 @@ public class TextPositionSequence implements CharSequence {
@JsonIgnore
@JsonAttribute(ignore = true)
public float getFontSize() {
return textPositions.get(0).getFontSizeInPt();
@ -224,6 +236,7 @@ public class TextPositionSequence implements CharSequence {
@JsonIgnore
@JsonAttribute(ignore = true)
public float getSpaceWidth() {
return textPositions.get(0).getWidthOfSpace();
@ -231,6 +244,7 @@ public class TextPositionSequence implements CharSequence {
@JsonIgnore
@JsonAttribute(ignore = true)
public int getRotation() {
return textPositions.get(0).getRotation();
@ -238,6 +252,7 @@ public class TextPositionSequence implements CharSequence {
@JsonIgnore
@JsonAttribute(ignore = true)
public Rectangle getRectangle() {
log.debug("Page: '{}', Word: '{}', Rotation: '{}', textRotation {}", page, toString(), textPositions.get(0).getRotation(), textPositions.get(0).getDir());
@ -292,11 +307,11 @@ public class TextPositionSequence implements CharSequence {
posXEnd = textPositions.get(textPositions.size() - 1).getYDirAdj() - height;
} else if (textPositions.get(0).getRotation() == 180 && textPositions.get(0).getDir() == 180f) {
posXInit = textPositions.get(0).getPageWidth() - getX1() +1;
posXInit = textPositions.get(0).getPageWidth() - getX1() + 1;
posXEnd = textPositions.get(0).getPageWidth() - getX2() + textPositions.get(0).getWidthDirAdj() - textPositions.get(textPositions.size() - 1)
.getWidthDirAdj() - 3;
posYInit = textPositions.get(0).getYDirAdj() - height + 2;
posYEnd = textPositions.get(textPositions.size() - 1)
posYEnd = textPositions.get(textPositions.size() - 1)
.getYDirAdj() - height + 2;
} else {

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import com.dslplatform.json.CompiledJson;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
@ -7,6 +8,7 @@ import lombok.NoArgsConstructor;
@Data
@Builder
@CompiledJson
@NoArgsConstructor
@AllArgsConstructor
public class Image implements ReasonHolder {

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import com.dslplatform.json.JsonAttribute;
import com.fasterxml.jackson.annotation.JsonIgnore;
import lombok.Data;
import lombok.NonNull;
@ -13,6 +14,7 @@ import java.awt.image.BufferedImage;
public class PdfImage {
@JsonIgnore
@JsonAttribute(ignore = true)
private BufferedImage image;
@NonNull
private RedRectangle2D position;

View File

@ -1,12 +1,15 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import com.dslplatform.json.CompiledJson;
import com.dslplatform.json.JsonAttribute;
import com.fasterxml.jackson.annotation.JsonIgnore;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@CompiledJson
@NoArgsConstructor
@AllArgsConstructor
public class RedRectangle2D {
@ -19,6 +22,7 @@ public class RedRectangle2D {
private double height;
@JsonIgnore
@JsonAttribute(ignore = true)
public boolean isEmpty() {
return width <= 0.0f || height <= 0.0f;
}

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import com.dslplatform.json.JsonAttribute;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
@ -9,12 +10,12 @@ import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizati
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
public class SearchableText {
@JsonIgnore
@JsonAttribute(ignore = true)
private transient String stringRepresentation;
private final List<TextPositionSequence> sequences = new ArrayList<>();

View File

@ -3,9 +3,11 @@ package com.iqser.red.service.redaction.v1.server.redaction.model.image;
import java.util.HashMap;
import java.util.Map;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data
@CompiledJson
public class Classification {
private Map<String, Float> probabilities = new HashMap<>();

View File

@ -1,8 +1,10 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data
@CompiledJson
public class FilterGeometry {
private ImageSize imageSize;

View File

@ -1,8 +1,10 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data
@CompiledJson
public class Filters {
private FilterGeometry geometry;

View File

@ -1,9 +1,11 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data
@CompiledJson
public class Geometry {
private float width;
private float height;
}
}

View File

@ -1,8 +1,10 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data
@CompiledJson
public class ImageFormat {
private float quotient;

View File

@ -1,8 +1,10 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data
@CompiledJson
public class ImageMetadata {
private Classification classification;

View File

@ -1,14 +1,16 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
import com.dslplatform.json.CompiledJson;
import com.dslplatform.json.JsonAttribute;
import com.fasterxml.jackson.annotation.JsonAlias;
import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.Data;
import java.util.ArrayList;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonAlias;
import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.Data;
@Data
@CompiledJson
public class ImageServiceResponse {
private String dossierId;
@ -16,6 +18,7 @@ public class ImageServiceResponse {
@JsonProperty(value = "imageMetadata")
@JsonAlias("data")
@JsonAttribute(alternativeNames = {"imageMetadata"})
private List<ImageMetadata> data = new ArrayList<>();
}

View File

@ -1,8 +1,10 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data
@CompiledJson
public class ImageSize {
private float quotient;

View File

@ -1,12 +1,14 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data
@CompiledJson
public class Position {
private float x1;
private float x2;
private float y1;
private float y2;
private int pageNumber;
}
}

View File

@ -1,8 +1,10 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data
@CompiledJson
public class Probability {
private boolean unconfident;

View File

@ -6,6 +6,7 @@ import com.iqser.red.service.persistence.service.v1.api.model.annotations.entity
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualLegalBasisChange;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.legalbasis.LegalBasis;
import com.iqser.red.service.redaction.v1.model.*;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
@ -75,6 +76,7 @@ public class AnalyzeService {
}
List<SectionText> sectionTexts = sectionTextBuilderService.buildSectionText(classifiedDoc);
sectionGridCreatorService.createSectionGrid(classifiedDoc, pageCount);
Text text = new Text(pageCount, sectionTexts);
@ -131,7 +133,7 @@ public class AnalyzeService {
if (redactionServiceSettings.isNerServiceEnabled()) {
nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
} else {
nerEntities = NerEntities.builder().build();
nerEntities = new NerEntities();
}
List<SectionText> reanalysisSections = text.getSectionTexts()
@ -177,7 +179,7 @@ public class AnalyzeService {
if (redactionServiceSettings.isNerServiceEnabled()) {
nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
} else {
nerEntities = NerEntities.builder().build();
nerEntities = new NerEntities();
}
dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
@ -189,7 +191,7 @@ public class AnalyzeService {
List<RedactionLogEntry> redactionLogEntries = redactionLogCreatorService.createRedactionLog(pageEntities, text.getNumberOfPages(), analyzeRequest.getDossierTemplateId());
var legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId());
var redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(), analyzeRequest.getAnalysisNumber(), redactionLogEntries, legalBasis, dictionary.getVersion()
var redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(), analyzeRequest.getAnalysisNumber(), redactionLogEntries, convert(legalBasis), dictionary.getVersion()
.getDossierTemplateVersion(), dictionary.getVersion()
.getDossierVersion(), rulesVersion, legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()));
@ -286,6 +288,9 @@ public class AnalyzeService {
.map(ManualForceRedaction::getAnnotationId)))).collect(Collectors.toSet());
}
public List<RedactionLogLegalBasis> convert(List<LegalBasis> legalBasis) {
return legalBasis.stream().map(l -> new RedactionLogLegalBasis(l.getName(), l.getDescription(), l.getReason())).collect(Collectors.toList());
}
public Image convert(RedactionLogEntry entry) {

View File

@ -69,7 +69,7 @@ public class RedactionLogMergeService {
}
private RedactionLog mergeRedactionLogData(RedactionLog redactionLog, SectionGrid sectionGrid, ManualRedactions manualRedactions,
Set<Integer> excludedPages, List<Type> types, Colors colors) {
Set<Integer> excludedPages, List<Type> types, Colors colors) {
var skippedImportedRedactions = new HashSet<>();
@ -93,7 +93,7 @@ public class RedactionLogMergeService {
skippedImportedRedactions.add(entry.getId());
}
entry.setComments(manualRedactions.getComments().get(entry.getId()));
entry.setComments(convert(manualRedactions.getComments().get(entry.getId())));
if (excludedPages != null && !excludedPages.isEmpty()) {
entry.getPositions().forEach(pos -> {
@ -335,7 +335,7 @@ public class RedactionLogMergeService {
if (shouldCreateManualEntry(manualRedactionEntry)) {
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(manualRedactionEntry, manualRedactionEntry.getAnnotationId(), colors, types);
redactionLogEntry.setPositions(convertPositions(manualRedactionEntry.getPositions()));
redactionLogEntry.setComments(comments.get(manualRedactionEntry.getAnnotationId()));
redactionLogEntry.setComments(convert(comments.get(manualRedactionEntry.getAnnotationId())));
redactionLogEntry.setTextBefore(manualRedactionEntry.getTextBefore());
redactionLogEntry.setTextAfter(manualRedactionEntry.getTextAfter());
@ -348,6 +348,11 @@ public class RedactionLogMergeService {
return redactionLogEntries;
}
private List<RedactionLogComment> convert(List<Comment> comments) {
return comments.stream().map(c -> new RedactionLogComment(c.getId(), c.getUser(), c.getText(), c.getAnnotationId(),
c.getFileId(), c.getDate(), c.getSoftDeletedTime())).collect(Collectors.toList());
}
private List<Rectangle> convertPositions(
List<com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle> positions) {

View File

@ -46,7 +46,7 @@ public class SectionGridCreatorService {
.getRectanglesPerPage()
.computeIfAbsent(page, (x) -> new ArrayList<>())
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock
.getHeight(), i + 1, paragraph.getPageBlocks().size()));
.getHeight(), i + 1, paragraph.getPageBlocks().size(),null));
} else if (textBlock instanceof Table) {

View File

@ -79,7 +79,7 @@ public class SectionTextBuilderService {
.get(0)
.getSequences()
.get(0)
.getPage());
.getPage(), null);
sectionText.getSectionAreas().add(sectionArea);
sectionText.getTextBlocks().addAll(cell.getTextBlocks());
@ -141,7 +141,7 @@ public class SectionTextBuilderService {
.get(0)
.getSequences()
.get(0)
.getPage());
.getPage(), null);
sectionText.getTextBlocks().addAll(cell.getTextBlocks());
sectionText.getSectionAreas().add(sectionArea);
@ -171,7 +171,7 @@ public class SectionTextBuilderService {
SectionText sectionText = new SectionText();
for (TextBlock paragraphTextBlock : paragraphTextBlocks) {
SectionArea sectionArea = new SectionArea(new Point(paragraphTextBlock.getMinX(), paragraphTextBlock.getMinY()), paragraphTextBlock
.getWidth(), paragraphTextBlock.getHeight(), paragraphTextBlock.getPage());
.getWidth(), paragraphTextBlock.getHeight(), paragraphTextBlock.getPage(), null);
sectionText.getSectionAreas().add(sectionArea);
}

View File

@ -36,7 +36,7 @@ public class ImageService {
ImageServiceResponse imageServiceResponse = objectMapper.readValue(imageClassificationStream, ImageServiceResponse.class);
Map<Integer, List<PdfImage>> images = new HashMap<>();
imageServiceResponse.getData().stream().forEach(imageMetadata -> {
imageServiceResponse.getData().forEach(imageMetadata -> {
var classification = imageMetadata.getFilters().isAllPassed() ? ImageType.valueOf(imageMetadata.getClassification().getLabel().toUpperCase(Locale.ROOT)) : ImageType.OTHER;
images.computeIfAbsent(imageMetadata.getPosition().getPageNumber() ,x -> new ArrayList<>())
.add(new PdfImage(new RedRectangle2D(imageMetadata.getPosition().getX1(), imageMetadata.getPosition().getY1(), imageMetadata.getGeometry().getWidth(), imageMetadata.getGeometry().getHeight()), classification, imageMetadata.getPosition().getPageNumber()));

View File

@ -1,5 +1,27 @@
package com.iqser.red.service.redaction.v1.server.segmentation;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
import com.iqser.red.service.redaction.v1.server.classification.service.ClassificationService;
import com.iqser.red.service.redaction.v1.server.parsing.PDFLinesTextStripper;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.SystemUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.springframework.stereotype.Service;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
@ -13,38 +35,11 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.SystemUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
import com.iqser.red.service.redaction.v1.server.classification.service.ClassificationService;
import com.iqser.red.service.redaction.v1.server.memory.MemoryStats;
import com.iqser.red.service.redaction.v1.server.parsing.PDFLinesTextStripper;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class PdfSegmentationService {
private final static int MAX_PAGES_BEFORE_GC = 250;
private final RulingCleaningService rulingCleaningService;
private final TableExtractionService tableExtractionService;
private final BlockificationService blockificationService;
@ -60,11 +55,10 @@ public class PdfSegmentationService {
//create tempFile
File tempFile;
if(SystemUtils.IS_OS_UNIX) {
if (SystemUtils.IS_OS_UNIX) {
FileAttribute<Set<PosixFilePermission>> attr = PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------"));
tempFile = Files.createTempFile("document", ".pdf", attr).toFile();
}
else {
} else {
tempFile = Files.createTempFile("document", ".pdf").toFile();
tempFile.setReadable(true, true);
tempFile.setWritable(true, true);
@ -78,15 +72,12 @@ public class PdfSegmentationService {
Document document = new Document();
List<Page> pages = new ArrayList<>();
pdDocument = reinitializePDDocument(tempFile, null);
pdDocument = PDDocument.load(tempFile, MemoryUsageSetting.setupMixed(1024 * 1024 * 64));
pdDocument.setAllSecurityToBeRemoved(true);
long pageCount = pdDocument.getNumberOfPages();
for (int pageNumber = 1; pageNumber <= pageCount; pageNumber++) {
if (pageNumber % MAX_PAGES_BEFORE_GC == 0) {
pdDocument = reinitializePDDocument(tempFile, pdDocument);
}
PDFLinesTextStripper stripper = new PDFLinesTextStripper();
PDPage pdPage = pdDocument.getPage(pageNumber - 1);
stripper.setPageNumber(pageNumber);
@ -107,6 +98,7 @@ public class PdfSegmentationService {
Page page = blockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings
.getVertical());
PDRectangle cropbox = pdPage.getCropBox();
float cropboxArea = cropbox.getHeight() * cropbox.getWidth();
page.setCropBoxArea(cropboxArea);
@ -134,11 +126,10 @@ public class PdfSegmentationService {
sectionsBuilderService.buildSections(document);
sectionsBuilderService.addImagesToSections(document);
pdDocument = reinitializePDDocument(tempFile, pdDocument);
IOUtils.close(pdDocument);
if(!tempFile.delete()){
if (!tempFile.delete()) {
log.warn("Could not delete tmp file");
}
@ -152,23 +143,6 @@ public class PdfSegmentationService {
}
private PDDocument reinitializePDDocument(File tempFile, PDDocument pdDocument) throws IOException {
if (pdDocument != null) {
pdDocument.close();
}
System.runFinalization();
System.gc();
MemoryStats.printMemoryStats();
var newPDDocument = PDDocument.load(tempFile, MemoryUsageSetting.setupTempFileOnly());
newPDDocument.setAllSecurityToBeRemoved(true);
return newPDDocument;
}
private void increaseDocumentStatistics(Page page, Document document) {
if (!page.isLandscape()) {

View File

@ -1,12 +1,7 @@
package com.iqser.red.service.redaction.v1.server.storage;
import java.io.IOException;
import java.io.InputStream;
import org.springframework.core.io.InputStreamResource;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.dslplatform.json.DslJson;
import com.dslplatform.json.runtime.Settings;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.redaction.v1.model.ImportedRedactions;
import com.iqser.red.service.redaction.v1.model.RedactionLog;
@ -16,20 +11,26 @@ import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.exception.NotFoundException;
import com.iqser.red.storage.commons.exception.StorageObjectDoesNotExist;
import com.iqser.red.storage.commons.service.StorageService;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.springframework.core.io.InputStreamResource;
import org.springframework.stereotype.Service;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
@Slf4j
@Service
@RequiredArgsConstructor
public class RedactionStorageService {
private final ObjectMapper objectMapper;
private final StorageService storageService;
private final DslJson<Object> dslJson = new DslJson<>(Settings.basicSetup());
@SneakyThrows
public InputStream getStoredObject(String storageId) {
@ -40,12 +41,13 @@ public class RedactionStorageService {
@SneakyThrows
public void storeObject(String dossierId, String fileId, FileType fileType, Object any) {
storageService.storeObject(StorageIdUtils.getStorageId(dossierId, fileId, fileType), objectMapper.writeValueAsBytes(any));
var baos = new ByteArrayOutputStream();
dslJson.serialize(any, baos);
storageService.storeObject(StorageIdUtils.getStorageId(dossierId, fileId, fileType), baos.toByteArray());
}
@SneakyThrows
public void storeObject(String dossierId, String fileId, FileType fileType, InputStream inputStream){
public void storeObject(String dossierId, String fileId, FileType fileType, InputStream inputStream) {
storageService.storeObject(StorageIdUtils.getStorageId(dossierId, fileId, fileType), inputStream);
}
@ -62,7 +64,7 @@ public class RedactionStorageService {
}
try {
return objectMapper.readValue(inputStreamResource.getInputStream(), ImportedRedactions.class);
return dslJson.deserialize(ImportedRedactions.class, inputStreamResource.getInputStream());
} catch (IOException e) {
throw new RuntimeException("Could not imported redactions", e);
}
@ -80,7 +82,7 @@ public class RedactionStorageService {
}
try {
return objectMapper.readValue(inputStreamResource.getInputStream(), RedactionLog.class);
return dslJson.deserialize(RedactionLog.class, inputStreamResource.getInputStream());
} catch (IOException e) {
throw new RuntimeException("Could not convert RedactionLog", e);
}
@ -98,7 +100,7 @@ public class RedactionStorageService {
}
try {
return objectMapper.readValue(inputStreamResource.getInputStream(), Text.class);
return dslJson.deserialize(Text.class, inputStreamResource.getInputStream());
} catch (IOException e) {
throw new RuntimeException("Could not convert Text", e);
}
@ -115,7 +117,7 @@ public class RedactionStorageService {
}
try {
return objectMapper.readValue(inputStreamResource.getInputStream(), NerEntities.class);
return dslJson.deserialize(NerEntities.class, inputStreamResource.getInputStream());
} catch (IOException e) {
throw new RuntimeException("Could not convert NER Entities", e);
}
@ -126,7 +128,7 @@ public class RedactionStorageService {
try {
var sectionGrid = storageService.getObject(StorageIdUtils.getStorageId(dossierId, fileId, FileType.SECTION_GRID));
return objectMapper.readValue(sectionGrid.getInputStream(), SectionGrid.class);
return dslJson.deserialize(SectionGrid.class, sectionGrid.getInputStream());
} catch (StorageObjectDoesNotExist e) {
throw new NotFoundException("Section Grid is not available.");
} catch (IOException e) {

View File

@ -1,9 +1,9 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
import com.dslplatform.json.JsonAttribute;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.server.classification.model.Orientation;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@ -33,11 +33,13 @@ public abstract class AbstractTextContainer {
}
@JsonIgnore
@JsonAttribute(ignore = true)
public float getHeight() {
return maxY - minY;
}
@JsonIgnore
@JsonAttribute(ignore = true)
public float getWidth() {
return maxX - minX;
}

View File

@ -12,6 +12,7 @@ import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.util.*;
import java.util.stream.Collectors;
public class FileSystemBackedStorageService implements StorageService{
@ -51,6 +52,12 @@ public class FileSystemBackedStorageService implements StorageService{
return new ArrayList<>(dataMap.keySet());
}
public List<String> listFilePaths(){
return dataMap.values().stream().map(File::getAbsolutePath).collect(Collectors.toList());
}
@SneakyThrows
@Override
public void storeObject(String objectId, byte[] data) {

View File

@ -21,7 +21,6 @@ import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.memory.MemoryStats;
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.service.ManualRedactionSurroundingTextService;
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
@ -243,7 +242,6 @@ public class RedactionIntegrationTest {
public void test270Rotated() {
AnalyzeRequest request = prepareStorage("files/Minimal Examples/270Rotated.pdf");
MemoryStats.printMemoryStats();
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
assertThat(result).isNotNull();
@ -255,7 +253,6 @@ public class RedactionIntegrationTest {
public void testLargeScannedFileOOM() {
AnalyzeRequest request = prepareStorage("scanned/VV-377031.pdf");
MemoryStats.printMemoryStats();
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
assertThat(result).isNotNull();

View File

@ -124,7 +124,7 @@ public class AnnotationService {
annotations.add(annotation);
if (redactionLogEntry.getComments() != null) {
for (Comment comment : redactionLogEntry.getComments()) {
for (RedactionLogComment comment : redactionLogEntry.getComments()) {
PDAnnotationText txtAnnot = new PDAnnotationText();
txtAnnot.setAnnotationName(String.valueOf(comment.getId()));
txtAnnot.setInReplyTo(annotation); // Reference to highlight annotation