RED-5664: Enabled to redact words that start or end with seperator, needed for japan documents
This commit is contained in:
parent
0e925f2f24
commit
1fca62f578
@ -91,8 +91,8 @@ public class SearchableText {
|
||||
|
||||
if (searchSpace.get(i).charAt(j, caseInsensitive) == searchChars[counter] || counter != 0 && searchSpace.get(i).charAt(j, caseInsensitive) == '-') {
|
||||
|
||||
if (counter != 0 || i == 0 && j == 0 || j != 0 && SeparatorUtils.isSeparator(searchSpace.get(i)
|
||||
.charAt(j - 1, caseInsensitive)) || j == 0 && i != 0 && SeparatorUtils.isSeparator(searchSpace.get(i - 1)
|
||||
if (counter != 0 || i == 0 && j == 0 || j != 0 && (SeparatorUtils.isSeparator(searchSpace.get(i).charAt(j - 1, caseInsensitive)) || SeparatorUtils.isSeparator(
|
||||
searchSpace.get(i).charAt(j, caseInsensitive))) || j == 0 && i != 0 && SeparatorUtils.isSeparator(searchSpace.get(i - 1)
|
||||
.charAt(searchSpace.get(i - 1).length() - 1, caseInsensitive)) || j == 0 && i != 0 && searchSpace.get(i - 1)
|
||||
.charAt(searchSpace.get(i - 1).length() - 1, caseInsensitive) != ' ' && searchSpace.get(i).charAt(j, caseInsensitive) != ' ') {
|
||||
partMatch.add(searchSpace.get(i), searchSpace.get(i).textPositionAt(j));
|
||||
@ -105,9 +105,10 @@ public class SearchableText {
|
||||
crossSequenceParts.add(partMatch);
|
||||
|
||||
if (i == searchSpace.size() - 1 && j == searchSpace.get(i).length() - 1 || j != searchSpace.get(i)
|
||||
.length() - 1 && SeparatorUtils.isSeparator(searchSpace.get(i).charAt(j + 1, caseInsensitive)) || j == searchSpace.get(i)
|
||||
.length() - 1 && SeparatorUtils.isSeparator(searchSpace.get(i + 1).charAt(0, caseInsensitive)) || j == searchSpace.get(i)
|
||||
.length() - 1 && searchSpace.get(i).charAt(j, caseInsensitive) != ' ' && searchSpace.get(i + 1).charAt(0, caseInsensitive) != ' ') {
|
||||
.length() - 1 && (SeparatorUtils.isSeparator(searchSpace.get(i).charAt(j + 1, caseInsensitive)) || SeparatorUtils.isSeparator(searchSpace.get(i)
|
||||
.charAt(j, caseInsensitive))) || j == searchSpace.get(i).length() - 1 && SeparatorUtils.isSeparator(searchSpace.get(i + 1)
|
||||
.charAt(0, caseInsensitive)) || j == searchSpace.get(i).length() - 1 && searchSpace.get(i)
|
||||
.charAt(j, caseInsensitive) != ' ' && searchSpace.get(i + 1).charAt(0, caseInsensitive) != ' ') {
|
||||
finalMatches.addAll(buildEntityPositionSequence(crossSequenceParts, normalizedSearchString));
|
||||
}
|
||||
|
||||
@ -118,7 +119,7 @@ public class SearchableText {
|
||||
} else {
|
||||
counter = 0;
|
||||
if (!crossSequenceParts.isEmpty()) {
|
||||
j--;
|
||||
j = j - partMatch.length() - 1;
|
||||
}
|
||||
crossSequenceParts = new ArrayList<>();
|
||||
partMatch = new TextPositionSequence(searchSpace.get(i).getPage());
|
||||
|
||||
@ -71,8 +71,8 @@ public final class EntitySearchUtils {
|
||||
|
||||
private void validateAndAddEntity(Set<Entity> entities, FindEntityDetails findEntityDetails, String inputString, int startIndex, int stopIndex) {
|
||||
|
||||
if ((startIndex == 0 || SeparatorUtils.isSeparator(inputString.charAt(startIndex - 1))) && (stopIndex == inputString.length() || SeparatorUtils.isSeparator(inputString.charAt(
|
||||
stopIndex)))) {
|
||||
if ((startIndex == 0 || SeparatorUtils.isSeparator(inputString.charAt(startIndex - 1)) || SeparatorUtils.isSeparator(inputString.charAt(startIndex))) && (stopIndex == inputString.length() || SeparatorUtils.isSeparator(
|
||||
inputString.charAt(stopIndex)) || SeparatorUtils.isSeparator(inputString.charAt(stopIndex - 1)))) {
|
||||
entities.add(new Entity(inputString.substring(startIndex, stopIndex),
|
||||
findEntityDetails.getType(),
|
||||
startIndex,
|
||||
@ -305,9 +305,12 @@ public final class EntitySearchUtils {
|
||||
.get(0)
|
||||
.getSequences()
|
||||
.get(0)
|
||||
.getMinXDirAdj() && image.getPosition().getX() + image.getPosition().getWidth() > entity.getPositionSequences().get(0).getSequences().get(0).getMaxXDirAdj() && image.getPosition()
|
||||
.getY() < entity.getPositionSequences().get(0).getSequences().get(0).getMinYDirAdj() && image.getPosition().getY() + image.getPosition()
|
||||
.getHeight() > entity.getPositionSequences().get(0).getSequences().get(0).getMaxYDirAdj();
|
||||
.getMinXDirAdj() && image.getPosition().getX() + image.getPosition().getWidth() > entity.getPositionSequences()
|
||||
.get(0)
|
||||
.getSequences()
|
||||
.get(0)
|
||||
.getMaxXDirAdj() && image.getPosition().getY() < entity.getPositionSequences().get(0).getSequences().get(0).getMinYDirAdj() && image.getPosition()
|
||||
.getY() + image.getPosition().getHeight() > entity.getPositionSequences().get(0).getSequences().get(0).getMaxYDirAdj();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -364,7 +364,7 @@ public class RedactionIntegrationTest {
|
||||
@Test
|
||||
public void titleExtraction() throws IOException {
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/new/table-with-merged-cells.pdf");
|
||||
AnalyzeRequest request = prepareStorage("files/new/APN3_Clean_6.1 (6.4.3.01-02)_Apple_211029.pdf");
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
@ -13,4 +13,20 @@ Dorn
|
||||
Prasher
|
||||
David
|
||||
annotation
|
||||
J.B. RASCLE
|
||||
J.B. RASCLE
|
||||
(果梗を除去したもの)
|
||||
(青森植)
|
||||
逸脱:
|
||||
(青森植)、ふじ(岩手植)、
|
||||
ひろさきふじ(青森植)、ふじ(岩手植)、つがる(長野植須坂)
|
||||
ひろさきふじ(青森植)、ふじ(岩手植)、つがる(長野植須坂) 学名
|
||||
りんご 品種 :ひろさきふじ(青森植)、ふじ(岩手植)、つがる(長野植須坂) 学名
|
||||
要約
|
||||
:準拠
|
||||
作物残留試験において、
|
||||
日間保存した。
|
||||
-20℃
|
||||
青森植
|
||||
サンプル量
|
||||
供試試料 (無処理 区)
|
||||
材料
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user