Merge branch 'main' into RED-8701

This commit is contained in:
Corina Olariu 2024-04-23 11:46:59 +03:00
commit 6a86036a78
3 changed files with 78 additions and 9 deletions

View File

@ -44,7 +44,6 @@ public class SectionsBuilderService {
for (ClassificationPage page : document.getPages()) {
List<TextPageBlock> header = new ArrayList<>();
List<TextPageBlock> footer = new ArrayList<>();
List<TextPageBlock> unclassifiedText = new ArrayList<>();
for (AbstractPageBlock current : page.getTextBlocks()) {
if (current.getClassification() == null) {
@ -63,11 +62,6 @@ public class SectionsBuilderService {
continue;
}
if (current.getClassification().equals(PageBlockType.OTHER)) {
unclassifiedText.add((TextPageBlock) current);
continue;
}
if (prev != null && current.getClassification().isHeadline() && !prev.getClassification().isHeadline() || !document.isHeadlines()) {
ClassificationSection chunkBlock = buildTextBlock(chunkWords, lastHeadline);
chunkBlock.setHeadline(lastHeadline);
@ -95,9 +89,6 @@ public class SectionsBuilderService {
if (!footer.isEmpty()) {
footers.add(new ClassificationFooter(footer));
}
if (!unclassifiedText.isEmpty()) {
unclassifiedTexts.add(new UnclassifiedText(unclassifiedText));
}
}
ClassificationSection chunkBlock = buildTextBlock(chunkWords, lastHeadline);

View File

@ -0,0 +1,78 @@
package com.knecon.fforesight.service.layoutparser.server;
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.ClassPathResource;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedSectionText;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedText;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingFinishedEvent;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipeline;
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingStorageService;
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
import com.knecon.fforesight.service.layoutparser.processor.services.SimplifiedSectionTextService;
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
import com.knecon.fforesight.service.layoutparser.server.utils.AbstractTest;
import io.micrometer.observation.Observation;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class SimplifiedTextServiceTest
extends AbstractTest {
@Autowired
private LayoutParsingPipeline layoutParsingPipeline;
@Autowired
private SimplifiedSectionTextService simplifiedSectionTextService;
@Test
@SneakyThrows
public void testSearchIndexing() {
File file = new ClassPathResource("files/headerFooterTest3Pages.pdf").getFile();
String footerExample = "Footer to search for ";
String headerExample ="Header to search for ";
Document document = buildGraph(file);
SimplifiedText simplifiedText = simplifiedSectionTextService.toSimplifiedText(document);
List<SimplifiedSectionText> sectionTexts = simplifiedText.getSectionTexts();
assertThat(sectionTexts.stream().filter(section -> section.getText().equals(footerExample)).collect(Collectors.toList()).size()).isGreaterThan(0);
assertThat(sectionTexts.stream().filter(section -> section.getText().equals(headerExample)).collect(Collectors.toList()).size()).isGreaterThan(0);
}
@SneakyThrows
protected Document buildGraph(File file) {
return DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.REDACT_MANAGER,
layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
file,
new ImageServiceResponse(),
new TableServiceResponse(),
new VisualLayoutParsingResponse(),
Map.of("file",file.toString())));
}
}