Pull request #516: RED-6204

Merge in RED/redaction-service from RED-6204 to master

* commit '6e73cac99c99fc333b9ec29b4a16aad48ffba149':
  RED-6204: Added annotation to suppress a false-positive warning in SonarQube
  RED-6204: Corrected typo in comment
  RED-6204: Simplified conversion of File to Path
  RED-6204: Corrected temp file deletion in PdfSegmentationService.
  RED-6204: Upgraded to newest platform-dependency and migrated tests to Junit5
This commit is contained in:
Viktor Seifert 2023-02-24 17:29:43 +01:00
commit 8ce093090e
15 changed files with 188 additions and 162 deletions

View File

@ -5,7 +5,7 @@
<parent>
<artifactId>platform-dependency</artifactId>
<groupId>com.iqser.red</groupId>
<version>1.13.0</version>
<version>1.17.0</version>
<relativePath/>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -53,15 +53,14 @@ public class PdfSegmentationService {
public Document parseDocument(String dossierId, String fileId, InputStream documentInputStream, Map<Integer, List<PdfImage>> pdfImages) throws IOException {
PDDocument pdDocument = null;
File tempFile = null;
try {
//create tempFile
File tempFile = FileUtils.createTempFile("document", ".pdf");
Map<Integer, List<PdfTableCell>> pdfTableCells = new HashMap<>();
if (redactionServiceSettings.isCvTableParsingEnabled()) {
pdfTableCells = tableService.convertTables(dossierId, fileId);
}
tempFile = FileUtils.createTempFile("document", ".pdf");
try (var fos = new FileOutputStream(tempFile)) {
IOUtils.copy(documentInputStream, fos);
@ -74,44 +73,7 @@ public class PdfSegmentationService {
long pageCount = pdDocument.getNumberOfPages();
for (int pageNumber = 1; pageNumber <= pageCount; pageNumber++) {
PDFLinesTextStripper stripper = new PDFLinesTextStripper();
PDPage pdPage = pdDocument.getPage(pageNumber - 1);
stripper.setPageNumber(pageNumber);
stripper.setStartPage(pageNumber);
stripper.setEndPage(pageNumber);
stripper.setPdpage(pdPage);
stripper.getText(pdDocument);
PDRectangle pdr = pdPage.getMediaBox();
int rotation = pdPage.getRotation();
boolean isLandscape = pdr.getWidth() > pdr.getHeight() && (rotation == 0 || rotation == 180) || pdr.getHeight() > pdr.getWidth() && (rotation == 90 || rotation == 270);
PDRectangle cropbox = pdPage.getCropBox();
CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(pdfTableCells.get(pageNumber),
stripper.getRulings(),
stripper.getMinCharWidth(),
stripper.getMaxCharHeight());
Page page = blockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
page.setRotation(rotation);
page.setLandscape(isLandscape);
page.setPageNumber(pageNumber);
page.setPageWidth(cropbox.getWidth());
page.setPageHeight(cropbox.getHeight());
// If images is ocr needs to be calculated before textBlocks are moved into tables, otherwise findOcr algorithm needs to be adopted.
if (pdfImages != null && pdfImages.containsKey(pageNumber)) {
page.setImages(pdfImages.get(pageNumber));
imageService.findOcr(page);
}
tableExtractionService.extractTables(cleanRulings, page);
buildPageStatistics(page);
increaseDocumentStatistics(page, document);
pages.add(page);
processPage(pdfImages, pdDocument, pdfTableCells, document, pages, pageNumber);
}
document.setPages(pages);
@ -120,22 +82,65 @@ public class PdfSegmentationService {
sectionsBuilderService.buildSections(document);
sectionsBuilderService.addImagesToSections(document);
IOUtils.close(pdDocument);
if (!tempFile.delete()) {
log.warn("Could not delete tmp file");
}
return document;
}
} finally {
if (pdDocument != null) {
pdDocument.close();
}
FileUtils.deleteFile(tempFile);
}
}
private void processPage(Map<Integer, List<PdfImage>> pdfImages,
PDDocument pdDocument,
Map<Integer, List<PdfTableCell>> pdfTableCells,
Document document,
List<Page> pages,
int pageNumber) throws IOException {
PDFLinesTextStripper stripper = new PDFLinesTextStripper();
PDPage pdPage = pdDocument.getPage(pageNumber - 1);
stripper.setPageNumber(pageNumber);
stripper.setStartPage(pageNumber);
stripper.setEndPage(pageNumber);
stripper.setPdpage(pdPage);
stripper.getText(pdDocument);
PDRectangle pdr = pdPage.getMediaBox();
int rotation = pdPage.getRotation();
boolean isLandscape = pdr.getWidth() > pdr.getHeight() && (rotation == 0 || rotation == 180) || pdr.getHeight() > pdr.getWidth() && (rotation == 90 || rotation == 270);
PDRectangle cropbox = pdPage.getCropBox();
CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(pdfTableCells.get(pageNumber),
stripper.getRulings(),
stripper.getMinCharWidth(),
stripper.getMaxCharHeight());
Page page = blockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
page.setRotation(rotation);
page.setLandscape(isLandscape);
page.setPageNumber(pageNumber);
page.setPageWidth(cropbox.getWidth());
page.setPageHeight(cropbox.getHeight());
// If images is ocr needs to be calculated before textBlocks are moved into tables, otherwise findOcr algorithm needs to be adopted.
if (pdfImages != null && pdfImages.containsKey(pageNumber)) {
page.setImages(pdfImages.get(pageNumber));
imageService.findOcr(page);
}
tableExtractionService.extractTables(cleanRulings, page);
buildPageStatistics(page);
increaseDocumentStatistics(page, document);
pages.add(page);
}
private void increaseDocumentStatistics(Page page, Document document) {
if (!page.isLandscape()) {

View File

@ -20,19 +20,31 @@ public class FileUtils {
}
/**
* Deletes a file; logs a message with the reason if the deletion fails.
* This method is null-safe.
* @param file The file to delete. Can be null.
*/
public void deleteFile(File file) {
if (file != null) {
try {
Files.deleteIfExists(file.toPath());
} catch (IOException ex) {
log.warn("Could not delete file!", ex);
}
}
}
// We don't need to check the results of the permission setters below,
// since we're manipulating a file we created ourselves.
@SuppressWarnings("ResultOfMethodCallIgnored")
@SuppressWarnings({"ResultOfMethodCallIgnored", "squid:S899"})
private void setRWPermissionsOnlyForOwner(File tempFile) {
try {
// deny for all
tempFile.setReadable(false);
tempFile.setWritable(false);
tempFile.setExecutable(false);
// allow for owner
tempFile.setReadable(true, true);
tempFile.setWritable(true, true);
tempFile.setExecutable(false);
} catch (SecurityException ex) {
// This should never happen since we're creating a temp file ourselves.
log.warn("Caught an exception during temp file creation. This should not happend. Check the code.", ex);

View File

@ -1,16 +1,16 @@
package com.iqser.red.service.redaction.v1.server;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryVersion;
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyBoolean;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.when;
import org.junit.Test;
import org.junit.runner.RunWith;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.kie.api.runtime.KieContainer;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
@ -21,16 +21,18 @@ import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryVersion;
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import static org.mockito.ArgumentMatchers.*;
import static org.mockito.Mockito.when;
@RunWith(SpringRunner.class)
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class)
public class DictionaryServiceTest {

View File

@ -20,10 +20,10 @@ import java.util.Set;
import java.util.stream.Collectors;
import org.assertj.core.api.Assertions;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.kie.api.KieServices;
import org.kie.api.builder.KieBuilder;
import org.kie.api.builder.KieFileSystem;
@ -41,7 +41,7 @@ import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.amazonaws.services.s3.AmazonS3;
import com.fasterxml.jackson.databind.ObjectMapper;
@ -72,7 +72,7 @@ import lombok.EqualsAndHashCode;
import lombok.SneakyThrows;
import lombok.ToString;
@RunWith(SpringRunner.class)
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(HeadlinesGoldStandardIntegrationTest.RedactionIntegrationTestConfiguration.class)
public class HeadlinesGoldStandardIntegrationTest {
@ -229,7 +229,7 @@ public class HeadlinesGoldStandardIntegrationTest {
}
@After
@AfterEach
public void cleanupStorage() {
if (this.storageService instanceof FileSystemBackedStorageService) {
@ -238,7 +238,7 @@ public class HeadlinesGoldStandardIntegrationTest {
}
@Before
@BeforeEach
public void stubClients() {
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);

View File

@ -24,11 +24,11 @@ import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import org.junit.After;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.kie.api.KieServices;
import org.kie.api.builder.KieBuilder;
import org.kie.api.builder.KieFileSystem;
@ -46,7 +46,7 @@ import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.amazonaws.services.s3.AmazonS3;
import com.fasterxml.jackson.core.type.TypeReference;
@ -92,7 +92,7 @@ import com.iqser.red.storage.commons.service.StorageService;
import lombok.SneakyThrows;
@RunWith(SpringRunner.class)
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class)
public class RedactionIntegrationTest {
@ -202,7 +202,7 @@ public class RedactionIntegrationTest {
}
@After
@AfterEach
public void cleanupStorage() {
if (this.storageService instanceof FileSystemBackedStorageService) {
@ -211,7 +211,7 @@ public class RedactionIntegrationTest {
}
@Before
@BeforeEach
public void stubClients() {
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
@ -293,7 +293,7 @@ public class RedactionIntegrationTest {
@Test
@Ignore
@Disabled
public void testLargeScannedFileOOM() {
AnalyzeRequest request = prepareStorage("scanned/VV-377031.pdf");
@ -398,7 +398,7 @@ public class RedactionIntegrationTest {
@Test
@Ignore
@Disabled
@SneakyThrows
public void testIgnoreHint() {
@ -440,7 +440,7 @@ public class RedactionIntegrationTest {
@Test
@Ignore
@Disabled
public void noExceptionShouldBeThrownForAnyFiles() throws IOException {
long start = System.currentTimeMillis();
@ -1252,7 +1252,7 @@ public class RedactionIntegrationTest {
@Test
@Ignore
@Disabled
public void resizeRedactionTest() throws IOException {
String pdfFile = "files/Minimal Examples/Single Table.pdf";
@ -1369,7 +1369,7 @@ public class RedactionIntegrationTest {
@Test
@Ignore
@Disabled
public void testManualSurroundingText() throws IOException {
String pdfFile = "files/new/S4.pdf";

View File

@ -28,11 +28,12 @@ import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.EnabledIfSystemProperty;
import org.junit.jupiter.api.extension.ExtendWith;
import org.kie.api.KieServices;
import org.kie.api.builder.KieBuilder;
import org.kie.api.builder.KieFileSystem;
@ -49,8 +50,7 @@ import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.annotation.IfProfileValue;
import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.amazonaws.services.s3.AmazonS3;
import com.fasterxml.jackson.databind.DeserializationFeature;
@ -88,7 +88,7 @@ import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@RunWith(SpringRunner.class)
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(RulesTest.RulesTestConfiguration.class)
public class RulesTest {
@ -247,7 +247,7 @@ public class RulesTest {
private LegalBasisClient legalBasisClient;
@Before
@BeforeEach
public void stubClients() {
objectMapper.registerModule(new JavaTimeModule());
@ -280,7 +280,7 @@ public class RulesTest {
}
@After
@AfterEach
public void cleanupStorage() {
if (this.storageService instanceof FileSystemBackedStorageService) {
@ -294,7 +294,7 @@ public class RulesTest {
* If the RedactionLog already exists, it will be overwritten
* Test is ignored, because it's for manual tests.
*/
@Ignore
@Disabled
@Test
public void generateRedactionLogForOneFile() {
@ -336,7 +336,7 @@ public class RulesTest {
* Analyses all files and compares its RedactionLog with saved one from here: REDACTION_LOG_PATH.
* If RedactionLog Json for one file does not exist, whole test will fail.
*/
@IfProfileValue(name = "test-groups", value = "rules-test")
@EnabledIfSystemProperty(named = "test-groups", matches = "(.*)rules-test(.*)")
@Test
public void analyseAllFilesAndCompareRedactionLogs() {

View File

@ -4,7 +4,7 @@ import java.time.OffsetDateTime;
import java.util.List;
import java.util.Set;
import org.junit.Test;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.ClassPathResource;

View File

@ -1,28 +1,21 @@
package com.iqser.red.service.redaction.v1.server.realdata;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.service.v1.api.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.server.Application;
import com.iqser.red.service.redaction.v1.server.FileSystemBackedStorageService;
import com.iqser.red.service.redaction.v1.server.client.*;
import com.iqser.red.service.redaction.v1.server.queue.RedactionMessageReceiver;
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyBoolean;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.ArgumentMatchers.nullable;
import static org.mockito.Mockito.when;
import io.micrometer.prometheus.PrometheusMeterRegistry;
import lombok.SneakyThrows;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
@ -35,18 +28,32 @@ import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;
import org.springframework.core.io.support.ResourcePatternResolver;
import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.service.v1.api.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.server.Application;
import com.iqser.red.service.redaction.v1.server.FileSystemBackedStorageService;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.EntityRecognitionClient;
import com.iqser.red.service.redaction.v1.server.client.FileStatusProcessingUpdateClient;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.queue.RedactionMessageReceiver;
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
import static org.mockito.ArgumentMatchers.*;
import static org.mockito.Mockito.when;
import io.micrometer.prometheus.PrometheusMeterRegistry;
import lombok.SneakyThrows;
@RunWith(SpringRunner.class)
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(LiveDataIntegrationTest.RedactionIntegrationTestConfiguration.class)
public class LiveDataIntegrationTest {
@ -108,7 +115,7 @@ public class LiveDataIntegrationTest {
@SneakyThrows
@Before
@BeforeEach
public void prepareTest() {
when(dictionaryClient.getVersion(anyString())).thenReturn(1L);

View File

@ -1,11 +1,11 @@
package com.iqser.red.service.redaction.v1.server.redaction.rulebuilder;
import com.iqser.red.service.redaction.v1.model.RuleBuilderModel;
import org.junit.Test;
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
import org.junit.jupiter.api.Test;
import com.iqser.red.service.redaction.v1.model.RuleBuilderModel;
public class RuleBuilderModelServiceTest {
@Test

View File

@ -5,7 +5,7 @@ import static org.assertj.core.api.Assertions.assertThat;
import java.util.HashSet;
import java.util.Set;
import org.junit.Test;
import org.junit.jupiter.api.Test;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;

View File

@ -6,7 +6,7 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.junit.Test;
import org.junit.jupiter.api.Test;
public class RegExPatternTest {

View File

@ -1,7 +1,7 @@
package com.iqser.red.service.redaction.v1.server.redaction.utils;
import org.assertj.core.api.Assertions;
import org.junit.Test;
import org.junit.jupiter.api.Test;
public class TextNormalizationUtilitiesTest {

View File

@ -11,8 +11,8 @@ import java.util.Locale;
import java.util.Map;
import java.util.stream.Collectors;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.kie.api.runtime.KieContainer;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.beans.factory.annotation.Autowired;
@ -25,7 +25,7 @@ import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.amazonaws.services.s3.AmazonS3;
import com.fasterxml.jackson.databind.ObjectMapper;
@ -49,7 +49,7 @@ import com.iqser.red.storage.commons.service.StorageService;
import lombok.SneakyThrows;
@RunWith(SpringRunner.class)
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(PdfSegmentationServiceTest.TestConfiguration.class)
public class PdfSegmentationServiceTest {

View File

@ -1,24 +1,24 @@
package com.iqser.red.service.redaction.v1.server.stringmatching;
import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;
import lombok.SneakyThrows;
import org.ahocorasick.trie.Trie;
import org.apache.commons.io.IOUtils;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit4.SpringRunner;
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
import org.ahocorasick.trie.Trie;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit.jupiter.SpringExtension;
@RunWith(SpringRunner.class)
import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;
import lombok.SneakyThrows;
@ExtendWith(SpringExtension.class)
public class StringMatchingPerformanceTest {
@Test
@ -34,7 +34,7 @@ public class StringMatchingPerformanceTest {
System.out.println("Loaded text has a length of " + text.length() + " symbols");
System.out.println("Dictionary has " + dictionary.size() + " entries");
var patterns = dictionary.stream().map(p -> Pattern.compile(Pattern.quote(p))).collect(Collectors.toList());
var patterns = dictionary.stream().map(p -> Pattern.compile(Pattern.quote(p))).toList();
var trie = Trie.builder().ignoreCase().addKeywords(dictionary).build();
// 1. Naive approach
@ -56,7 +56,7 @@ public class StringMatchingPerformanceTest {
t1 = System.currentTimeMillis();
var boyerMooreIndexes = new HashSet<Index>();
for (var pattern : patterns) {
boyerMooreIndexes.addAll(pattern.matcher(text).results().map(r -> new Index(r.start(), r.end())).collect(Collectors.toList()));
boyerMooreIndexes.addAll(pattern.matcher(text).results().map(r -> new Index(r.start(), r.end())).toList());
}
t2 = System.currentTimeMillis();
System.out.println("Boyer Moore found " + boyerMooreIndexes.size() + " entries in " + (t2 - t1) + "ms");