From 623b8df5e6136d2ae6649e5016933abdc40454cc Mon Sep 17 00:00:00 2001 From: Timo Bejan Date: Tue, 15 Nov 2022 01:38:33 +0200 Subject: [PATCH] RED-5562 improved pattern compile --- .../redaction/v1/server/redaction/utils/SeparatorUtils.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/SeparatorUtils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/SeparatorUtils.java index e90f731b..8ae6988f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/SeparatorUtils.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/SeparatorUtils.java @@ -8,13 +8,14 @@ import java.util.regex.Pattern; @Slf4j public class SeparatorUtils { + private final static Pattern punctuationPattern = Pattern.compile("\\p{Punct}"); private final static Set quotes = Set.of('\'', '\u0022', '\u00AB', '\u00BB', '\u2018', '\u2019', '\u201A', '\u201C', '\u201D', '\u201E' , '\u2039', '\u203A'); private final static Set japaneseAltPunctuationMarks = Set.of(65288, 65289, 65294, 65339, 65341, 65371, 65373, 65375, 65376, 12443, 12444, 65309); public static boolean isSeparator(char c) { - return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || quotes.contains(c) || isJapaneseSeparator(c); + return Character.isWhitespace(c) || punctuationPattern.matcher(String.valueOf(c)).matches() || quotes.contains(c) || isJapaneseSeparator(c); } public static boolean isJapaneseSeparator(char c) {