Merge branch 'DM-307' into 'master'

DM-307: Combine all values of section before splitting into sentences for DocuMine

See merge request redactmanager/redaction-report-service!5
This commit is contained in:
Dominique Eifländer 2023-07-11 12:52:09 +02:00
commit e50cabbca1

View File

@ -363,13 +363,13 @@ public class RSSPoc2Service {
}
private SCMComponent combineValuesOfFirstFoundSection(RedactionLog redactionLog, String type, String seperator, String elseValue){
private SCMComponent combineValuesOfFirstFoundSection(RedactionLog redactionLog, String type, String seperator, String elseValue) {
String transformation = String.format("Combine paragraphs of '%s' with seperator '%s' in first Section found", type, seperator);
var entries = redactionLog.getRedactionLogEntry().stream().filter(r -> r.getType().equals(type)).toList();
if(entries.isEmpty()){
if (entries.isEmpty()) {
return SCMComponent.builder().originalValue(elseValue).transformation(transformation).build();
}
@ -561,22 +561,28 @@ public class RSSPoc2Service {
String transformation = String.format("Values of type '%s' as sentences", type);
List<SCMComponent> sentences = new ArrayList<>();
var typeStringsEntries = redactionLog.getRedactionLogEntry().stream().filter(r -> r.getType().equals(type)).collect(Collectors.toList());
if (typeStringsEntries.isEmpty()) {
Map<Integer, List<RedactionLogEntry>> entriesPerSection = new HashMap<>();
redactionLog.getRedactionLogEntry().stream().filter(r -> r.getType().equals(type)).forEach(e -> {
entriesPerSection.computeIfAbsent(e.getSectionNumber(), (x) -> new ArrayList<>()).add(e);
});
if (entriesPerSection.isEmpty()) {
return sentences;
}
for (RedactionLogEntry typeStringEntry : typeStringsEntries) {
for (Map.Entry<Integer, List<RedactionLogEntry>> entriesInSection : entriesPerSection.entrySet()) {
String combinedString = entriesInSection.getValue().stream().map(RedactionLogEntry::getValue).collect(Collectors.joining(" ")).trim();
BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US);
iterator.setText(typeStringEntry.getValue());
iterator.setText(combinedString);
int start = iterator.first();
for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) {
sentences.add(SCMComponent.builder()
.originalValue(typeStringEntry.getValue().substring(start, end).replaceAll("\\n", "").trim())
.scmAnnotations(List.of(toScmAnnotations(typeStringEntry)))
.originalValue(combinedString.substring(start, end).replaceAll("\\n", "").trim())
.scmAnnotations(entriesInSection.getValue().stream().map(this::toScmAnnotations).toList())
.transformation(transformation)
.build());