From adba0f99a03d15accb0d1ae9cd278b451ab6ebae Mon Sep 17 00:00:00 2001 From: aoezyetimoglu Date: Thu, 30 Sep 2021 11:04:17 +0200 Subject: [PATCH 1/2] RED-2272: Make sure publicly writable directories are used safely && regex upper bound --- .../redaction/v1/server/redaction/utils/Patterns.java | 2 +- .../redaction/utils/TextNormalizationUtilities.java | 2 +- .../v1/server/segmentation/PdfSegmentationService.java | 8 +++++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java index 76a6e52f..2c02bbc3 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java @@ -11,7 +11,7 @@ public class Patterns { public static Map patternCache = new HashMap<>(); - public static Pattern AUTHOR_TABLE_SPITTER = Pattern.compile("((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,}( ?[A-ZÄÖÜ]{1,2}\\.)+|((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,}( ?[A-ZÄÖÜ]{1,2} )+"); + public static Pattern AUTHOR_TABLE_SPITTER = Pattern.compile("((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,500}( ?[A-ZÄÖÜ]{1,2}\\.)+|((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,500}( ?[A-ZÄÖÜ]{1,2} )+"); public Pattern getCompiledPattern(String pattern, boolean caseInsensitive) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/TextNormalizationUtilities.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/TextNormalizationUtilities.java index 342f6b03..6007444c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/TextNormalizationUtilities.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/TextNormalizationUtilities.java @@ -12,7 +12,7 @@ public class TextNormalizationUtilities { * @return Text without line-break hyphenation. */ public static String removeHyphenLineBreaks(String text) { - return text.replaceAll("([^\\s\\d\\-]{2,})[\\-\\u00AD]\\R", "$1"); + return text.replaceAll("([^\\s\\d\\-]{2,500})[\\-\\u00AD]\\R", "$1"); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java index fccbff42..fc5037d6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java @@ -4,8 +4,13 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.attribute.FileAttribute; +import java.nio.file.attribute.PosixFilePermission; +import java.nio.file.attribute.PosixFilePermissions; import java.util.ArrayList; import java.util.List; +import java.util.Set; import org.apache.commons.io.IOUtils; import org.apache.pdfbox.io.MemoryUsageSetting; @@ -59,7 +64,8 @@ public class PdfSegmentationService { PDDocument pdDocument = null; try { //create tempFile - File tempFile = File.createTempFile("document", ".pdf"); + FileAttribute> attr = PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------")); + File tempFile = Files.createTempFile("document", ".pdf").toFile(); try (var fos = new FileOutputStream(tempFile)) { IOUtils.copy(documentInputStream, fos); From c4e47a48f8973ca1bbd602727a205647deb809fe Mon Sep 17 00:00:00 2001 From: aoezyetimoglu Date: Thu, 30 Sep 2021 11:54:09 +0200 Subject: [PATCH 2/2] RED-2272: Make sure publicly writable directories are used safely && regex upper bound --- .../service/redaction/v1/server/redaction/utils/Patterns.java | 2 +- .../v1/server/segmentation/PdfSegmentationService.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java index 2c02bbc3..76a6e52f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/Patterns.java @@ -11,7 +11,7 @@ public class Patterns { public static Map patternCache = new HashMap<>(); - public static Pattern AUTHOR_TABLE_SPITTER = Pattern.compile("((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,500}( ?[A-ZÄÖÜ]{1,2}\\.)+|((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,500}( ?[A-ZÄÖÜ]{1,2} )+"); + public static Pattern AUTHOR_TABLE_SPITTER = Pattern.compile("((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,}( ?[A-ZÄÖÜ]{1,2}\\.)+|((((di)|(van)) )|[A-Z]’)?[A-ZÄÖÜ][\\wäöüéèê]{2,}( ?[A-ZÄÖÜ]{1,2} )+"); public Pattern getCompiledPattern(String pattern, boolean caseInsensitive) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java index fc5037d6..475a45d7 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java @@ -65,7 +65,7 @@ public class PdfSegmentationService { try { //create tempFile FileAttribute> attr = PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------")); - File tempFile = Files.createTempFile("document", ".pdf").toFile(); + File tempFile = Files.createTempFile("document", ".pdf", attr).toFile(); try (var fos = new FileOutputStream(tempFile)) { IOUtils.copy(documentInputStream, fos);