diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java index 76a6e52f..2c02bbc3 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java @@ -11,7 +11,7 @@ public class Patterns { public static Map patternCache = new HashMap<>(); - public static Pattern AUTHOR_TABLE_SPITTER = Pattern.compile("((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,}( ?[A-ZÄÖÜ]{1,2}\\.)+|((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,}( ?[A-ZÄÖÜ]{1,2} )+"); + public static Pattern AUTHOR_TABLE_SPITTER = Pattern.compile("((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,500}( ?[A-ZÄÖÜ]{1,2}\\.)+|((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,500}( ?[A-ZÄÖÜ]{1,2} )+"); public Pattern getCompiledPattern(String pattern, boolean caseInsensitive) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/TextNormalizationUtilities.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/TextNormalizationUtilities.java index 342f6b03..6007444c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/TextNormalizationUtilities.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/TextNormalizationUtilities.java @@ -12,7 +12,7 @@ public class TextNormalizationUtilities { * @return Text without line-break hyphenation. */ public static String removeHyphenLineBreaks(String text) { - return text.replaceAll("([^\\s\\d\\-]{2,})[\\-\\u00AD]\\R", "$1"); + return text.replaceAll("([^\\s\\d\\-]{2,500})[\\-\\u00AD]\\R", "$1"); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java index fccbff42..fc5037d6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java @@ -4,8 +4,13 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.attribute.FileAttribute; +import java.nio.file.attribute.PosixFilePermission; +import java.nio.file.attribute.PosixFilePermissions; import java.util.ArrayList; import java.util.List; +import java.util.Set; import org.apache.commons.io.IOUtils; import org.apache.pdfbox.io.MemoryUsageSetting; @@ -59,7 +64,8 @@ public class PdfSegmentationService { PDDocument pdDocument = null; try { //create tempFile - File tempFile = File.createTempFile("document", ".pdf"); + FileAttribute> attr = PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------")); + File tempFile = Files.createTempFile("document", ".pdf").toFile(); try (var fos = new FileOutputStream(tempFile)) { IOUtils.copy(documentInputStream, fos);