RED-5562 improved pattern compile

This commit is contained in:
Timo Bejan 2022-11-15 01:38:33 +02:00
parent 20ab65afd2
commit 623b8df5e6

View File

@ -8,13 +8,14 @@ import java.util.regex.Pattern;
@Slf4j
public class SeparatorUtils {
private final static Pattern punctuationPattern = Pattern.compile("\\p{Punct}");
private final static Set<Character> quotes = Set.of('\'', '\u0022', '\u00AB', '\u00BB', '\u2018', '\u2019', '\u201A', '\u201C', '\u201D', '\u201E'
, '\u2039', '\u203A');
private final static Set<Integer> japaneseAltPunctuationMarks = Set.of(65288, 65289, 65294, 65339, 65341, 65371, 65373, 65375, 65376, 12443, 12444, 65309);
public static boolean isSeparator(char c) {
return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || quotes.contains(c) || isJapaneseSeparator(c);
return Character.isWhitespace(c) || punctuationPattern.matcher(String.valueOf(c)).matches() || quotes.contains(c) || isJapaneseSeparator(c);
}
public static boolean isJapaneseSeparator(char c) {