RED-882: Added textBefore and textAfter to redaction log
This commit is contained in:
parent
add196f913
commit
caf6277de9
@ -34,4 +34,7 @@ public class RedactionLogEntry {
|
||||
private ManualRedactionType manualRedactionType;
|
||||
private boolean isDictionaryEntry;
|
||||
|
||||
private String textBefore;
|
||||
private String textAfter;
|
||||
|
||||
}
|
||||
|
||||
@ -24,7 +24,6 @@ import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationSer
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.visualization.service.AnnotationHighlightService;
|
||||
import com.iqser.red.service.redaction.v1.server.visualization.service.PdfFlattenService;
|
||||
import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
@ -37,7 +36,6 @@ public class RedactionController implements RedactionResource {
|
||||
private final PdfSegmentationService pdfSegmentationService;
|
||||
private final AnnotationHighlightService annotationHighlightService;
|
||||
private final EntityRedactionService entityRedactionService;
|
||||
private final PdfFlattenService pdfFlattenService;
|
||||
private final DroolsExecutionService droolsExecutionService;
|
||||
|
||||
|
||||
@ -52,12 +50,6 @@ public class RedactionController implements RedactionResource {
|
||||
annotationHighlightService.highlight(pdDocument, classifiedDoc, redactionRequest.isFlatRedaction(), redactionRequest
|
||||
.getManualRedactions());
|
||||
|
||||
if (redactionRequest.isFlatRedaction()) {
|
||||
PDDocument flatDocument = pdfFlattenService.flattenPDF(pdDocument);
|
||||
return convert(flatDocument, classifiedDoc.getPages()
|
||||
.size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid(), classifiedDoc.getDictionaryVersion(), classifiedDoc.getRulesVersion());
|
||||
}
|
||||
|
||||
return convert(pdDocument, classifiedDoc.getPages()
|
||||
.size(), classifiedDoc.getRedactionLogEntities(), classifiedDoc.getSectionGrid(), classifiedDoc.getDictionaryVersion(), classifiedDoc.getRulesVersion());
|
||||
|
||||
|
||||
@ -35,8 +35,11 @@ public class Entity {
|
||||
|
||||
private boolean isDictionaryEntry;
|
||||
|
||||
private String textBefore;
|
||||
private String textAfter;
|
||||
|
||||
public Entity(String word, String type, boolean redaction, String redactionReason, List<EntityPositionSequence> positionSequences, String headline, int matchedRule, int sectionNumber, String legalBasis, boolean isDictionaryEntry) {
|
||||
|
||||
public Entity(String word, String type, boolean redaction, String redactionReason, List<EntityPositionSequence> positionSequences, String headline, int matchedRule, int sectionNumber, String legalBasis, boolean isDictionaryEntry, String textBefore, String textAfter) {
|
||||
|
||||
this.word = word;
|
||||
this.type = type;
|
||||
@ -48,6 +51,8 @@ public class Entity {
|
||||
this.sectionNumber = sectionNumber;
|
||||
this.legalBasis = legalBasis;
|
||||
this.isDictionaryEntry = isDictionaryEntry;
|
||||
this.textBefore = textBefore;
|
||||
this.textAfter = textAfter;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -41,6 +41,7 @@ public class EntityRedactionService {
|
||||
|
||||
private final DictionaryService dictionaryService;
|
||||
private final DroolsExecutionService droolsExecutionService;
|
||||
private final SurroundingWordsService surroundingWordsService;
|
||||
|
||||
|
||||
public void processDocument(Document classifiedDoc, ManualRedactions manualRedactions) {
|
||||
@ -83,7 +84,7 @@ public class EntityRedactionService {
|
||||
.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
|
||||
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
|
||||
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity
|
||||
.getLegalBasis(), entity.isDictionaryEntry()));
|
||||
.getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter()));
|
||||
}
|
||||
}
|
||||
|
||||
@ -94,9 +95,8 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> findEntities(Document classifiedDoc, ManualRedactions manualRedactions,
|
||||
Dictionary dictionary, boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber) {
|
||||
private Set<Entity> findEntities(Document classifiedDoc, ManualRedactions manualRedactions, Dictionary dictionary,
|
||||
boolean local, Map<Integer, Set<Entity>> hintsPerSectionNumber) {
|
||||
|
||||
Set<Entity> documentEntities = new HashSet<>();
|
||||
int sectionNumber = 1;
|
||||
@ -113,6 +113,7 @@ public class EntityRedactionService {
|
||||
SearchableText searchableRow = new SearchableText();
|
||||
Map<String, CellValue> tabularData = new HashMap<>();
|
||||
int start = 0;
|
||||
List<Integer> cellStarts = new ArrayList<>();
|
||||
for (Cell cell : row) {
|
||||
if (!singleCellTable && cell.isHeaderCell() || CollectionUtils.isEmpty(cell.getTextBlocks())) {
|
||||
continue;
|
||||
@ -128,12 +129,17 @@ public class EntityRedactionService {
|
||||
.replaceAll("-", "");
|
||||
tabularData.put(headerName, new CellValue(cell.getTextBlocks(), cellStart));
|
||||
});
|
||||
start = start + cell.toString().length() + 1;
|
||||
|
||||
for (TextBlock textBlock : cell.getTextBlocks()) {
|
||||
// TODO avoid cell overlap merging.
|
||||
searchableRow.addAll(textBlock.getSequences());
|
||||
}
|
||||
cellStarts.add(cellStart);
|
||||
start = start + cell.toString().trim().length() + 1;
|
||||
}
|
||||
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber, dictionary.getDictionaryModels(), local);
|
||||
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber, dictionary
|
||||
.getDictionaryModels(), local);
|
||||
surroundingWordsService.addSurroundingText(rowEntities, searchableRow, dictionary, cellStarts);
|
||||
|
||||
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
|
||||
.isLocal(local)
|
||||
@ -155,6 +161,7 @@ public class EntityRedactionService {
|
||||
|
||||
addSectionToManualRedactions(paragraph.getTextBlocks(), manualRedactions, paragraph.getHeadline(), sectionNumber);
|
||||
Set<Entity> entities = findEntities(searchableText, paragraph.getHeadline(), sectionNumber, dictionary.getDictionaryModels(), local);
|
||||
surroundingWordsService.addSurroundingText(entities, searchableText, dictionary);
|
||||
|
||||
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
|
||||
.isLocal(local)
|
||||
@ -176,20 +183,20 @@ public class EntityRedactionService {
|
||||
documentEntities.addAll(clearAndFindPositions(analysedRowSection.getEntities(), sectionSearchableTextPair.getSearchableText(), dictionary));
|
||||
|
||||
analysedRowSection.getLocalDictionaryAdds().keySet().forEach(key -> {
|
||||
if (dictionary.isRecommendation(key)){
|
||||
if (dictionary.isRecommendation(key)) {
|
||||
analysedRowSection.getLocalDictionaryAdds().get(key).forEach(value -> {
|
||||
if (!dictionary.containsValue(key, value)){
|
||||
if (!dictionary.containsValue(key, value)) {
|
||||
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
analysedRowSection.getLocalDictionaryAdds().get(key).forEach( value -> {
|
||||
analysedRowSection.getLocalDictionaryAdds().get(key).forEach(value -> {
|
||||
|
||||
if(dictionary.getLocalAccessMap().get(key) == null){
|
||||
if (dictionary.getLocalAccessMap().get(key) == null) {
|
||||
log.warn("Dictionary {} is null", key);
|
||||
}
|
||||
|
||||
if(dictionary.getLocalAccessMap().get(key).getLocalEntries() == null){
|
||||
if (dictionary.getLocalAccessMap().get(key).getLocalEntries() == null) {
|
||||
log.warn("Dictionary {} localEntries is null", key);
|
||||
}
|
||||
|
||||
@ -198,7 +205,6 @@ public class EntityRedactionService {
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
});
|
||||
|
||||
return documentEntities;
|
||||
@ -243,13 +249,14 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber, boolean local) {
|
||||
private Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber,
|
||||
boolean local) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
|
||||
for (String value : values) {
|
||||
|
||||
if(value.trim().length() <= 2) {
|
||||
if (value.trim().length() <= 2) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,140 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class SurroundingWordsService {
|
||||
|
||||
private final RedactionServiceSettings redactionServiceSettings;
|
||||
|
||||
|
||||
public void addSurroundingText(Set<Entity> entities, SearchableText searchableText, Dictionary dictionary) {
|
||||
|
||||
if (entities.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
for (Entity entity : entities) {
|
||||
|
||||
if (dictionary.isHint(entity.getType())) {
|
||||
continue;
|
||||
}
|
||||
findSurroundingWords(entity, searchableText.toString(), entity.getStart(), entity.getEnd());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("Could not get surrounding text!");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void addSurroundingText(Set<Entity> entities, SearchableText searchableText, Dictionary dictionary,
|
||||
List<Integer> cellstarts) {
|
||||
|
||||
if (entities.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
String searchableString = searchableText.toString();
|
||||
|
||||
if (cellstarts != null) {
|
||||
for (int i = 0; i < cellstarts.size(); i++) {
|
||||
|
||||
int startOffset = cellstarts.get(i);
|
||||
int endOffset = -1;
|
||||
|
||||
if (i + 1 < cellstarts.size()) {
|
||||
endOffset = cellstarts.get(i + 1);
|
||||
} else {
|
||||
endOffset = searchableString.length() - 1;
|
||||
}
|
||||
|
||||
String text = searchableString.substring(startOffset, endOffset);
|
||||
for (Entity entity : entities) {
|
||||
|
||||
if (dictionary.isHint(entity.getType())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (entity.getStart() >= startOffset && entity.getEnd() <= endOffset) {
|
||||
int entityStartOffset = entity.getStart() - startOffset;
|
||||
int entityEndOffset = entity.getEnd() - startOffset;
|
||||
findSurroundingWords(entity, text, entityStartOffset, entityEndOffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("Could not get surrounding text!");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void findSurroundingWords(Entity entity, String text, int entityStartOffset, int entityEndOffset) {
|
||||
|
||||
int offsetBefore = entityStartOffset - redactionServiceSettings.getSurroundingWordsOffsetWindow() < 0 ? 0 : entityStartOffset - redactionServiceSettings
|
||||
.getSurroundingWordsOffsetWindow();
|
||||
String textBefore = text.substring(offsetBefore, entityStartOffset);
|
||||
if (!textBefore.isBlank()) {
|
||||
String[] wordsBefore = textBefore.split(" ");
|
||||
int numberOfWordsBefore = wordsBefore.length > redactionServiceSettings.getNumberOfSurroundingWords() ? redactionServiceSettings
|
||||
.getNumberOfSurroundingWords() : wordsBefore.length;
|
||||
if (wordsBefore.length > 0) {
|
||||
entity.setTextBefore(concatWordsBefore(wordsBefore, numberOfWordsBefore));
|
||||
}
|
||||
}
|
||||
|
||||
int endOffset = entityEndOffset + redactionServiceSettings.getSurroundingWordsOffsetWindow() > text.length() ? text
|
||||
.length() : entityEndOffset + redactionServiceSettings.getSurroundingWordsOffsetWindow();
|
||||
String textAfter = text.substring(entityEndOffset, endOffset);
|
||||
if (!textAfter.isBlank()) {
|
||||
String[] wordsAfter = textAfter.split(" ");
|
||||
int numberOfWordsAfter = wordsAfter.length > redactionServiceSettings.getNumberOfSurroundingWords() ? redactionServiceSettings
|
||||
.getNumberOfSurroundingWords() : wordsAfter.length;
|
||||
if (wordsAfter.length > 0) {
|
||||
entity.setTextAfter(concatWordsAfter(wordsAfter, numberOfWordsAfter));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private String concatWordsBefore(String[] words, int number) {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
int startNumber = words.length > number ? words.length - number : 0;
|
||||
|
||||
for (int i = startNumber; i < words.length; i++) {
|
||||
sb.append(words[i]).append(" ");
|
||||
}
|
||||
|
||||
return sb.toString().trim();
|
||||
}
|
||||
|
||||
|
||||
private String concatWordsAfter(String[] words, int number) {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
for (int i = 0; i < number; i++) {
|
||||
sb.append(words[i]).append(" ");
|
||||
}
|
||||
|
||||
return sb.toString().trim();
|
||||
}
|
||||
|
||||
}
|
||||
@ -11,7 +11,7 @@ public class TextNormalizationUtilities {
|
||||
* @return Text without line-break hyphenation.
|
||||
*/
|
||||
public static String removeHyphenLineBreaks(String text) {
|
||||
return text.replaceAll("\\s(\\S+)[\\-\\u00AD]\\R|\n\r(.+ )", "\n$1$2");
|
||||
return text.replaceAll("([^\\s\\d\\-]{2,})[\\-\\u00AD]\\R|\n\r(.+ )", "$1$2");
|
||||
}
|
||||
|
||||
}
|
||||
@ -7,12 +7,9 @@ import lombok.Data;
|
||||
@Data
|
||||
@ConfigurationProperties("redaction-service")
|
||||
public class RedactionServiceSettings {
|
||||
|
||||
private int numberOfSurroundingWords = 3;
|
||||
|
||||
/**
|
||||
* Tenant used in single tenant mode.
|
||||
*/
|
||||
private String defaultTenant = "iqser-id";
|
||||
|
||||
private int flattenImageDpi = 100;
|
||||
private int surroundingWordsOffsetWindow = 100;
|
||||
|
||||
}
|
||||
@ -42,10 +42,13 @@ public class Cell extends Rectangle {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
|
||||
Iterator<TextBlock> itty = textBlocks.iterator();
|
||||
TextPositionSequence previous = null;
|
||||
while (itty.hasNext()) {
|
||||
|
||||
TextBlock textBlock = itty.next();
|
||||
TextPositionSequence previous = null;
|
||||
|
||||
for (TextPositionSequence word : textBlock.getSequences()) {
|
||||
if (previous != null) {
|
||||
if (Math.abs(previous.getRotationAdjustedY() - word.getRotationAdjustedY()) > word.getTextHeight()) {
|
||||
@ -57,9 +60,7 @@ public class Cell extends Rectangle {
|
||||
sb.append(word.toString());
|
||||
previous = word;
|
||||
}
|
||||
if (itty.hasNext()) {
|
||||
sb.append(' ');
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString())
|
||||
@ -67,4 +68,8 @@ public class Cell extends Rectangle {
|
||||
.replaceAll(" {2}", " ");
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
@ -276,6 +276,8 @@ public class AnnotationHighlightService {
|
||||
.sectionNumber(entity.getSectionNumber())
|
||||
.matchedRule(entity.getMatchedRule())
|
||||
.isDictionaryEntry(entity.isDictionaryEntry())
|
||||
.textAfter(entity.getTextAfter())
|
||||
.textBefore(entity.getTextBefore())
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
@ -1,68 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.visualization.service;
|
||||
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
||||
import org.apache.pdfbox.rendering.ImageType;
|
||||
import org.apache.pdfbox.rendering.PDFRenderer;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class PdfFlattenService {
|
||||
|
||||
private final RedactionServiceSettings settings;
|
||||
|
||||
public PDDocument flattenPDF(PDDocument sourceDoc) throws IOException {
|
||||
|
||||
PDDocument destDoc = new PDDocument();
|
||||
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(sourceDoc);
|
||||
|
||||
final int pageCount = sourceDoc.getDocumentCatalog().getPages().getCount();
|
||||
|
||||
log.info(pageCount + " page" + (pageCount == 1 ? "" : "s") + " to flatten.");
|
||||
|
||||
for (int i = 0; i < pageCount; i += 1) {
|
||||
|
||||
log.info("Flattening page " + (i + 1) + " of " + pageCount + "...");
|
||||
|
||||
BufferedImage img = pdfRenderer.renderImageWithDPI(i, settings.getFlattenImageDpi(), ImageType.RGB);
|
||||
|
||||
log.info("Image rendered in memory (" + img.getWidth() + "x" + img.getHeight() + " " + settings.getFlattenImageDpi() + "DPI). Adding to PDF...");
|
||||
|
||||
PDPage imagePage = new PDPage(new PDRectangle(img.getWidth(), img.getHeight()));
|
||||
destDoc.addPage(imagePage);
|
||||
|
||||
PDImageXObject imgObj = LosslessFactory.createFromImage(destDoc, img);
|
||||
|
||||
PDPageContentStream imagePageContentStream = new PDPageContentStream(destDoc, imagePage);
|
||||
imagePageContentStream.drawImage(imgObj, 0, 0);
|
||||
|
||||
log.info("Image added successfully.");
|
||||
|
||||
imagePageContentStream.close();
|
||||
|
||||
img.flush();
|
||||
}
|
||||
|
||||
log.info("New flattened PDF created in memory.");
|
||||
|
||||
sourceDoc.close();
|
||||
|
||||
return destDoc;
|
||||
}
|
||||
|
||||
}
|
||||
@ -382,7 +382,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
System.out.println("redactionTest");
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
@ -391,11 +391,11 @@ public class RedactionIntegrationTest {
|
||||
|
||||
RedactionResult result = redactionController.redact(request);
|
||||
|
||||
result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
|
||||
if(entry.isDictionaryEntry()){
|
||||
System.out.println(entry.getValue());
|
||||
}
|
||||
});
|
||||
// result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
|
||||
// if(!entry.isHint()){
|
||||
// System.out.println(entry.getPositions().get(0).getPage() +":"+ entry.getTextBefore() +"--->"+ entry.getValue() + "--->" + entry.getTextAfter());
|
||||
// }
|
||||
// });
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Redacted.pdf")) {
|
||||
fileOutputStream.write(result.getDocument());
|
||||
|
||||
@ -1,17 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class TextNormalizationUtilities {
|
||||
|
||||
/**
|
||||
* Revert hyphenation due to line breaks.
|
||||
* @param text Text to be processed.
|
||||
* @return Text without line-break hyphenation.
|
||||
*/
|
||||
public static String removeHyphenLineBreaks(String text) {
|
||||
return text.replaceAll("\\s(\\S+)[\\-\\u00AD]\\R|\n\r(.+ )", "\n$1$2");
|
||||
}
|
||||
|
||||
}
|
||||
@ -10,11 +10,11 @@ public class TextNormalizationUtilitiesTest {
|
||||
|
||||
String test = "Without these peo-\nple, this conference would not happen";
|
||||
Assertions.assertThat(TextNormalizationUtilities.removeHyphenLineBreaks(test))
|
||||
.contains("\npeople");
|
||||
.contains("people");
|
||||
|
||||
test = "Die\t\nFreiwillige\t Versicherung\t endet\t zudem\t für\t den\t ein\u00AD\nzelnen\tVersicherten\tmit\tder\tAufhebung\tdes\tVertra-\nges,\t seiner\t Unterstellung\t unter\t die\t obligatorische\t\nVersicherung\t oder\t seinem\t Ausschluss.";
|
||||
Assertions.assertThat(TextNormalizationUtilities.removeHyphenLineBreaks(test))
|
||||
.contains("\neinzelnen", "\nVertrages");
|
||||
.contains("einzelnen", "Vertrages");
|
||||
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user