Merge branch 'DM-307' into 'master'
DM-307: Combine all values of section before splitting into sentences for DocuMine See merge request redactmanager/redaction-report-service!5
This commit is contained in:
commit
e50cabbca1
@ -363,13 +363,13 @@ public class RSSPoc2Service {
|
||||
}
|
||||
|
||||
|
||||
private SCMComponent combineValuesOfFirstFoundSection(RedactionLog redactionLog, String type, String seperator, String elseValue){
|
||||
private SCMComponent combineValuesOfFirstFoundSection(RedactionLog redactionLog, String type, String seperator, String elseValue) {
|
||||
|
||||
String transformation = String.format("Combine paragraphs of '%s' with seperator '%s' in first Section found", type, seperator);
|
||||
|
||||
var entries = redactionLog.getRedactionLogEntry().stream().filter(r -> r.getType().equals(type)).toList();
|
||||
|
||||
if(entries.isEmpty()){
|
||||
if (entries.isEmpty()) {
|
||||
return SCMComponent.builder().originalValue(elseValue).transformation(transformation).build();
|
||||
}
|
||||
|
||||
@ -561,22 +561,28 @@ public class RSSPoc2Service {
|
||||
String transformation = String.format("Values of type '%s' as sentences", type);
|
||||
|
||||
List<SCMComponent> sentences = new ArrayList<>();
|
||||
var typeStringsEntries = redactionLog.getRedactionLogEntry().stream().filter(r -> r.getType().equals(type)).collect(Collectors.toList());
|
||||
|
||||
if (typeStringsEntries.isEmpty()) {
|
||||
Map<Integer, List<RedactionLogEntry>> entriesPerSection = new HashMap<>();
|
||||
redactionLog.getRedactionLogEntry().stream().filter(r -> r.getType().equals(type)).forEach(e -> {
|
||||
entriesPerSection.computeIfAbsent(e.getSectionNumber(), (x) -> new ArrayList<>()).add(e);
|
||||
});
|
||||
|
||||
if (entriesPerSection.isEmpty()) {
|
||||
return sentences;
|
||||
}
|
||||
|
||||
for (RedactionLogEntry typeStringEntry : typeStringsEntries) {
|
||||
for (Map.Entry<Integer, List<RedactionLogEntry>> entriesInSection : entriesPerSection.entrySet()) {
|
||||
|
||||
String combinedString = entriesInSection.getValue().stream().map(RedactionLogEntry::getValue).collect(Collectors.joining(" ")).trim();
|
||||
|
||||
BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US);
|
||||
iterator.setText(typeStringEntry.getValue());
|
||||
iterator.setText(combinedString);
|
||||
int start = iterator.first();
|
||||
for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) {
|
||||
|
||||
sentences.add(SCMComponent.builder()
|
||||
.originalValue(typeStringEntry.getValue().substring(start, end).replaceAll("\\n", "").trim())
|
||||
.scmAnnotations(List.of(toScmAnnotations(typeStringEntry)))
|
||||
.originalValue(combinedString.substring(start, end).replaceAll("\\n", "").trim())
|
||||
.scmAnnotations(entriesInSection.getValue().stream().map(this::toScmAnnotations).toList())
|
||||
.transformation(transformation)
|
||||
.build());
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user