From ca8b1b331841b913149762ee2c8454a9afd9f20c Mon Sep 17 00:00:00 2001 From: deiflaender Date: Tue, 11 Jul 2023 12:50:02 +0200 Subject: [PATCH] DM-307: Combine all values of section before splitting into sentences for DocuMine --- .../v1/server/service/RSSPoc2Service.java | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/redaction-report-service-v1/redaction-report-service-server-v1/src/main/java/com/iqser/red/service/redaction/report/v1/server/service/RSSPoc2Service.java b/redaction-report-service-v1/redaction-report-service-server-v1/src/main/java/com/iqser/red/service/redaction/report/v1/server/service/RSSPoc2Service.java index c6b9212..0fdb559 100644 --- a/redaction-report-service-v1/redaction-report-service-server-v1/src/main/java/com/iqser/red/service/redaction/report/v1/server/service/RSSPoc2Service.java +++ b/redaction-report-service-v1/redaction-report-service-server-v1/src/main/java/com/iqser/red/service/redaction/report/v1/server/service/RSSPoc2Service.java @@ -363,13 +363,13 @@ public class RSSPoc2Service { } - private SCMComponent combineValuesOfFirstFoundSection(RedactionLog redactionLog, String type, String seperator, String elseValue){ + private SCMComponent combineValuesOfFirstFoundSection(RedactionLog redactionLog, String type, String seperator, String elseValue) { String transformation = String.format("Combine paragraphs of '%s' with seperator '%s' in first Section found", type, seperator); var entries = redactionLog.getRedactionLogEntry().stream().filter(r -> r.getType().equals(type)).toList(); - if(entries.isEmpty()){ + if (entries.isEmpty()) { return SCMComponent.builder().originalValue(elseValue).transformation(transformation).build(); } @@ -561,22 +561,28 @@ public class RSSPoc2Service { String transformation = String.format("Values of type '%s' as sentences", type); List sentences = new ArrayList<>(); - var typeStringsEntries = redactionLog.getRedactionLogEntry().stream().filter(r -> r.getType().equals(type)).collect(Collectors.toList()); - if (typeStringsEntries.isEmpty()) { + Map> entriesPerSection = new HashMap<>(); + redactionLog.getRedactionLogEntry().stream().filter(r -> r.getType().equals(type)).forEach(e -> { + entriesPerSection.computeIfAbsent(e.getSectionNumber(), (x) -> new ArrayList<>()).add(e); + }); + + if (entriesPerSection.isEmpty()) { return sentences; } - for (RedactionLogEntry typeStringEntry : typeStringsEntries) { + for (Map.Entry> entriesInSection : entriesPerSection.entrySet()) { + + String combinedString = entriesInSection.getValue().stream().map(RedactionLogEntry::getValue).collect(Collectors.joining(" ")).trim(); BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US); - iterator.setText(typeStringEntry.getValue()); + iterator.setText(combinedString); int start = iterator.first(); for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) { sentences.add(SCMComponent.builder() - .originalValue(typeStringEntry.getValue().substring(start, end).replaceAll("\\n", "").trim()) - .scmAnnotations(List.of(toScmAnnotations(typeStringEntry))) + .originalValue(combinedString.substring(start, end).replaceAll("\\n", "").trim()) + .scmAnnotations(entriesInSection.getValue().stream().map(this::toScmAnnotations).toList()) .transformation(transformation) .build());