Pull request #419: RED-4548: Create SIMPLIFIED_TEXT.json from TEXT.json (for NER-service)

Merge in RED/redaction-service from RED-4548 to master

* commit 'df80ff68cd21626679c107b056b275b67ce6bf74':
  RED-4548: Create SIMPLIFIED_TEXT.json from TEXT.json (for NER-service)
This commit is contained in:
Philipp Schramm 2022-07-12 16:41:06 +02:00
commit bfadac7a3f
4 changed files with 93 additions and 13 deletions

View File

@ -12,7 +12,7 @@
<artifactId>redaction-service-api-v1</artifactId>
<properties>
<persistence-service.version>1.238.0</persistence-service.version>
<persistence-service.version>1.240.0</persistence-service.version>
</properties>
<dependencies>

View File

@ -0,0 +1,20 @@
package com.iqser.red.service.redaction.v1.server.classification.model;
import com.dslplatform.json.CompiledJson;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@CompiledJson
@NoArgsConstructor
@AllArgsConstructor
public class SimplifiedSectionText {
private int sectionNumber;
private String text;
}

View File

@ -0,0 +1,23 @@
package com.iqser.red.service.redaction.v1.server.classification.model;
import java.util.ArrayList;
import java.util.List;
import com.dslplatform.json.CompiledJson;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@CompiledJson
@NoArgsConstructor
@AllArgsConstructor
public class SimplifiedText {
private int numberOfPages;
private List<SimplifiedSectionText> simplifiedSectionTexts = new ArrayList<>();
}

View File

@ -1,5 +1,21 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import static com.iqser.red.service.redaction.v1.server.redaction.service.ImportedRedactionService.IMPORTED_REDACTION_TYPE;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service;
import org.springframework.web.bind.annotation.RequestBody;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualForceRedaction;
@ -8,15 +24,31 @@ import com.iqser.red.service.persistence.service.v1.api.model.annotations.entity
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.legalbasis.LegalBasis;
import com.iqser.red.service.redaction.v1.model.*;
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.model.RedactionLog;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.RedactionLogLegalBasis;
import com.iqser.red.service.redaction.v1.model.SectionArea;
import com.iqser.red.service.redaction.v1.model.SectionGrid;
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.classification.model.SimplifiedSectionText;
import com.iqser.red.service.redaction.v1.server.classification.model.SimplifiedText;
import com.iqser.red.service.redaction.v1.server.classification.model.Text;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryVersion;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
import com.iqser.red.service.redaction.v1.server.redaction.model.RedRectangle2D;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.SearchImplementation;
import com.iqser.red.service.redaction.v1.server.segmentation.ImageService;
@ -28,15 +60,6 @@ import io.micrometer.core.annotation.Timed;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service;
import org.springframework.web.bind.annotation.RequestBody;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static com.iqser.red.service.redaction.v1.server.redaction.service.ImportedRedactionService.IMPORTED_REDACTION_TYPE;
@Slf4j
@Service
@ -93,8 +116,9 @@ public class AnalyzeService {
.map(SectionArea::getPage)
.collect(Collectors.toSet()), sectionText.getSectionAreas())));
log.info("Store text and section grid for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
log.info("Store text, simplified text and section grid for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.TEXT, text);
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.SIMPLIFIED_TEXT, convert(text));
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, classifiedDoc.getSectionGrid());
return AnalyzeResult.builder()
@ -312,4 +336,17 @@ public class AnalyzeService {
}
}
private SimplifiedText convert(Text text) {
List<SimplifiedSectionText> sectionTexts = new ArrayList<>();
int numberOfPages = 0;
if (text != null) {
numberOfPages = text.getNumberOfPages();
text.getSectionTexts().stream().filter(Objects::nonNull).forEach(st -> sectionTexts.add(new SimplifiedSectionText(st.getSectionNumber(), st.getText())));
}
return SimplifiedText.builder().numberOfPages(numberOfPages).simplifiedSectionTexts(sectionTexts).build();
}
}