Pull request #498: RED-5562 japanse space characters
Merge in RED/redaction-service from RED-5562-mst to master * commit '623b8df5e6136d2ae6649e5016933abdc40454cc': RED-5562 improved pattern compile RED-5562 japanse space characters - fixed PMD RED-5562 japanse space characters
This commit is contained in:
commit
91e227248d
@ -1,16 +1,26 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@Slf4j
|
||||
public class SeparatorUtils {
|
||||
|
||||
private final static Set<Character> quotes = Set.of('\'', '\u0022', '\u00AB', '\u00BB', '\u2018', '\u2019', '\u201A', '\u201C', '\u201D', '\u201E', '\u2039', '\u203A');
|
||||
private final static Pattern punctuationPattern = Pattern.compile("\\p{Punct}");
|
||||
private final static Set<Character> quotes = Set.of('\'', '\u0022', '\u00AB', '\u00BB', '\u2018', '\u2019', '\u201A', '\u201C', '\u201D', '\u201E'
|
||||
, '\u2039', '\u203A');
|
||||
|
||||
private final static Set<Integer> japaneseAltPunctuationMarks = Set.of(65288, 65289, 65294, 65339, 65341, 65371, 65373, 65375, 65376, 12443, 12444, 65309);
|
||||
|
||||
public static boolean isSeparator(char c) {
|
||||
return Character.isWhitespace(c) || punctuationPattern.matcher(String.valueOf(c)).matches() || quotes.contains(c) || isJapaneseSeparator(c);
|
||||
}
|
||||
|
||||
return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || quotes.contains(c);
|
||||
public static boolean isJapaneseSeparator(char c) {
|
||||
var intValue = (int) c;
|
||||
return intValue >= 12288 && intValue <= 12336 || japaneseAltPunctuationMarks.contains(intValue);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user