From 00b0cb160342f1857ac0e523f994918057d5fc6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominique=20Eifl=C3=A4nder?= Date: Mon, 8 Feb 2021 13:08:10 +0100 Subject: [PATCH] RED-1039: Fixed finding textpositions, RED-1042: Fixed get rectangles per line --- .../redaction/model/SearchableText.java | 41 +++++++++++++------ .../service/RedactionLogCreatorService.java | 2 +- .../v1/server/RedactionIntegrationTest.java | 10 ++++- 3 files changed, 39 insertions(+), 14 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java index 20c85f98..dfa500ea 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/SearchableText.java @@ -32,6 +32,7 @@ public class SearchableText { } + @SuppressWarnings("checkstyle:ModifiedControlVariable") public List getSequences(String searchString, boolean caseInsensitive, List sequencesSubList) { @@ -66,9 +67,12 @@ public class SearchableText { for (int j = 0; j < searchSpace.get(i).length(); j++) { if (i > 0 && j == 0 && searchSpace.get(i).charAt(0, caseInsensitive) == ' ' && searchSpace.get(i - 1) - .charAt(searchSpace.get(i - 1).length() - 1, caseInsensitive) == ' ' || j > 0 && searchSpace.get(i) - .charAt(j, caseInsensitive) == ' ' && searchSpace.get(i).charAt(j - 1, caseInsensitive) == ' ') { - if (j == searchSpace.get(i).length() - 1 && counter != 0 && !partMatch.getTextPositions().isEmpty()) { + .charAt(searchSpace.get(i - 1) + .length() - 1, caseInsensitive) == ' ' || j > 0 && searchSpace.get(i) + .charAt(j, caseInsensitive) == ' ' && searchSpace.get(i) + .charAt(j - 1, caseInsensitive) == ' ') { + if (j == searchSpace.get(i).length() - 1 && counter != 0 && !partMatch.getTextPositions() + .isEmpty()) { crossSequenceParts.add(partMatch); } continue; @@ -80,8 +84,8 @@ public class SearchableText { counter++; } - if (searchSpace.get(i) - .charAt(j, caseInsensitive) == searchChars[counter] || counter != 0 && searchSpace.get(i) + if (searchSpace.get(i).charAt(j, caseInsensitive) == searchChars[counter] || counter != 0 && searchSpace + .get(i) .charAt(j, caseInsensitive) == '-') { if (counter != 0 || i == 0 && j == 0 || j != 0 && isSeparator(searchSpace.get(i) @@ -100,14 +104,15 @@ public class SearchableText { if (counter == searchString.length()) { crossSequenceParts.add(partMatch); - if (i == searchSpace.size() - 1 && j == searchSpace.get(i).length() - 1 || j != searchSpace.get(i) - .length() - 1 && isSeparator(searchSpace.get(i) + if (i == searchSpace.size() - 1 && j == searchSpace.get(i) + .length() - 1 || j != searchSpace.get(i).length() - 1 && isSeparator(searchSpace.get(i) .charAt(j + 1, caseInsensitive)) || j == searchSpace.get(i) .length() - 1 && isSeparator(searchSpace.get(i + 1) - .charAt(0, caseInsensitive)) || j == searchSpace.get(i).length() - 1 && searchSpace.get(i) + .charAt(0, caseInsensitive)) || j == searchSpace.get(i) + .length() - 1 && searchSpace.get(i) .charAt(j, caseInsensitive) != ' ' && searchSpace.get(i + 1) .charAt(0, caseInsensitive) != ' ') { - finalMatches.addAll(buildEntityPositionSequence(crossSequenceParts)); + finalMatches.addAll(buildEntityPositionSequence(crossSequenceParts, normalizedSearchString)); } counter = 0; @@ -130,15 +135,21 @@ public class SearchableText { } return finalMatches; - } - private List buildEntityPositionSequence(List crossSequenceParts) { + private List buildEntityPositionSequence(List crossSequenceParts, + String searchString) { + + List result = new ArrayList<>(); + String asString = buildString(crossSequenceParts); + if (!asString.equalsIgnoreCase(searchString)) { + return result; + } String plainId = IdBuilder.buildId(crossSequenceParts); String id = plainId; - List result = new ArrayList<>(); + int currentPage = -1; int idDiffentPageSuffix = 1; EntityPositionSequence entityPositionSequence = new EntityPositionSequence(id); @@ -173,6 +184,12 @@ public class SearchableText { @Override public String toString() { + return buildString(sequences); + } + + + public String buildString(List sequences) { + StringBuilder sb = new StringBuilder(); TextPositionSequence previous = null; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java index 01eea643..19e61ab6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java @@ -194,7 +194,7 @@ public class RedactionLogCreatorService { startIndex = i; } } - if (startIndex != textPositions.size() - 1) { + if (startIndex != textPositions.size()) { rectangles.add(new TextPositionSequence(textPositions.subList(startIndex, textPositions.size()), page).getRectangle()); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index eb34a1dd..75a60c07 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -418,6 +418,7 @@ public class RedactionIntegrationTest { } + private List getPathsRecursively(File path) { List result = new ArrayList<>(); @@ -439,9 +440,16 @@ public class RedactionIntegrationTest { @Test public void redactionTest() throws IOException { +// 49 Cyprodinil - EU AIR3 - MCA Section 8 Supplement - Ecotoxicological studies on the active substance.pdf +// 182 Fludioxonil - EU AIR3 - MCA Section 8 Supplement - Ecotoxicological studies on the active substance.pdf +// 38 A14325E - EU AIR3 - MCP Section 10 - Ecotoxicological studies on the plant protection product.pdf +// 91 Trinexapac-ethyl_RAR_01_Volume_1_2018-02-23.pdf +// 95 Trinexapac-ethyl_RAR_08_Volume_3CA_B-6_2018-01-10.pdf + + System.out.println("redactionTest"); long start = System.currentTimeMillis(); - ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/Cyprodinil/49 Cyprodinil - EU AIR3 - MCA Section 8 Supplement - Ecotoxicological studies on the active substance.pdf"); AnalyzeRequest request = AnalyzeRequest.builder() .ruleSetId(TEST_RULESET_ID)