Pull request #120: RED-1039: Fixed finding textpositions, RED-1042: Fixed get rectangles per line
Merge in RED/redaction-service from RED-1039 to master * commit '00b0cb160342f1857ac0e523f994918057d5fc6b': RED-1039: Fixed finding textpositions, RED-1042: Fixed get rectangles per line
This commit is contained in:
commit
6d9ed080ce
@ -32,6 +32,7 @@ public class SearchableText {
|
||||
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("checkstyle:ModifiedControlVariable")
|
||||
public List<EntityPositionSequence> getSequences(String searchString, boolean caseInsensitive,
|
||||
List<TextPositionSequence> sequencesSubList) {
|
||||
@ -66,9 +67,12 @@ public class SearchableText {
|
||||
for (int j = 0; j < searchSpace.get(i).length(); j++) {
|
||||
|
||||
if (i > 0 && j == 0 && searchSpace.get(i).charAt(0, caseInsensitive) == ' ' && searchSpace.get(i - 1)
|
||||
.charAt(searchSpace.get(i - 1).length() - 1, caseInsensitive) == ' ' || j > 0 && searchSpace.get(i)
|
||||
.charAt(j, caseInsensitive) == ' ' && searchSpace.get(i).charAt(j - 1, caseInsensitive) == ' ') {
|
||||
if (j == searchSpace.get(i).length() - 1 && counter != 0 && !partMatch.getTextPositions().isEmpty()) {
|
||||
.charAt(searchSpace.get(i - 1)
|
||||
.length() - 1, caseInsensitive) == ' ' || j > 0 && searchSpace.get(i)
|
||||
.charAt(j, caseInsensitive) == ' ' && searchSpace.get(i)
|
||||
.charAt(j - 1, caseInsensitive) == ' ') {
|
||||
if (j == searchSpace.get(i).length() - 1 && counter != 0 && !partMatch.getTextPositions()
|
||||
.isEmpty()) {
|
||||
crossSequenceParts.add(partMatch);
|
||||
}
|
||||
continue;
|
||||
@ -80,8 +84,8 @@ public class SearchableText {
|
||||
counter++;
|
||||
}
|
||||
|
||||
if (searchSpace.get(i)
|
||||
.charAt(j, caseInsensitive) == searchChars[counter] || counter != 0 && searchSpace.get(i)
|
||||
if (searchSpace.get(i).charAt(j, caseInsensitive) == searchChars[counter] || counter != 0 && searchSpace
|
||||
.get(i)
|
||||
.charAt(j, caseInsensitive) == '-') {
|
||||
|
||||
if (counter != 0 || i == 0 && j == 0 || j != 0 && isSeparator(searchSpace.get(i)
|
||||
@ -100,14 +104,15 @@ public class SearchableText {
|
||||
if (counter == searchString.length()) {
|
||||
crossSequenceParts.add(partMatch);
|
||||
|
||||
if (i == searchSpace.size() - 1 && j == searchSpace.get(i).length() - 1 || j != searchSpace.get(i)
|
||||
.length() - 1 && isSeparator(searchSpace.get(i)
|
||||
if (i == searchSpace.size() - 1 && j == searchSpace.get(i)
|
||||
.length() - 1 || j != searchSpace.get(i).length() - 1 && isSeparator(searchSpace.get(i)
|
||||
.charAt(j + 1, caseInsensitive)) || j == searchSpace.get(i)
|
||||
.length() - 1 && isSeparator(searchSpace.get(i + 1)
|
||||
.charAt(0, caseInsensitive)) || j == searchSpace.get(i).length() - 1 && searchSpace.get(i)
|
||||
.charAt(0, caseInsensitive)) || j == searchSpace.get(i)
|
||||
.length() - 1 && searchSpace.get(i)
|
||||
.charAt(j, caseInsensitive) != ' ' && searchSpace.get(i + 1)
|
||||
.charAt(0, caseInsensitive) != ' ') {
|
||||
finalMatches.addAll(buildEntityPositionSequence(crossSequenceParts));
|
||||
finalMatches.addAll(buildEntityPositionSequence(crossSequenceParts, normalizedSearchString));
|
||||
}
|
||||
|
||||
counter = 0;
|
||||
@ -130,15 +135,21 @@ public class SearchableText {
|
||||
}
|
||||
|
||||
return finalMatches;
|
||||
|
||||
}
|
||||
|
||||
|
||||
private List<EntityPositionSequence> buildEntityPositionSequence(List<TextPositionSequence> crossSequenceParts) {
|
||||
private List<EntityPositionSequence> buildEntityPositionSequence(List<TextPositionSequence> crossSequenceParts,
|
||||
String searchString) {
|
||||
|
||||
List<EntityPositionSequence> result = new ArrayList<>();
|
||||
String asString = buildString(crossSequenceParts);
|
||||
if (!asString.equalsIgnoreCase(searchString)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
String plainId = IdBuilder.buildId(crossSequenceParts);
|
||||
String id = plainId;
|
||||
List<EntityPositionSequence> result = new ArrayList<>();
|
||||
|
||||
int currentPage = -1;
|
||||
int idDiffentPageSuffix = 1;
|
||||
EntityPositionSequence entityPositionSequence = new EntityPositionSequence(id);
|
||||
@ -173,6 +184,12 @@ public class SearchableText {
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return buildString(sequences);
|
||||
}
|
||||
|
||||
|
||||
public String buildString(List<TextPositionSequence> sequences) {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
TextPositionSequence previous = null;
|
||||
|
||||
@ -194,7 +194,7 @@ public class RedactionLogCreatorService {
|
||||
startIndex = i;
|
||||
}
|
||||
}
|
||||
if (startIndex != textPositions.size() - 1) {
|
||||
if (startIndex != textPositions.size()) {
|
||||
rectangles.add(new TextPositionSequence(textPositions.subList(startIndex, textPositions.size()), page).getRectangle());
|
||||
}
|
||||
}
|
||||
|
||||
@ -418,6 +418,7 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
|
||||
private List<File> getPathsRecursively(File path) {
|
||||
|
||||
List<File> result = new ArrayList<>();
|
||||
@ -439,9 +440,16 @@ public class RedactionIntegrationTest {
|
||||
@Test
|
||||
public void redactionTest() throws IOException {
|
||||
|
||||
// 49 Cyprodinil - EU AIR3 - MCA Section 8 Supplement - Ecotoxicological studies on the active substance.pdf
|
||||
// 182 Fludioxonil - EU AIR3 - MCA Section 8 Supplement - Ecotoxicological studies on the active substance.pdf
|
||||
// 38 A14325E - EU AIR3 - MCP Section 10 - Ecotoxicological studies on the plant protection product.pdf
|
||||
// 91 Trinexapac-ethyl_RAR_01_Volume_1_2018-02-23.pdf
|
||||
// 95 Trinexapac-ethyl_RAR_08_Volume_3CA_B-6_2018-01-10.pdf
|
||||
|
||||
|
||||
System.out.println("redactionTest");
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Cyprodinil/49 Cyprodinil - EU AIR3 - MCA Section 8 Supplement - Ecotoxicological studies on the active substance.pdf");
|
||||
|
||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
||||
.ruleSetId(TEST_RULESET_ID)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user