diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/SectionsBuilderService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/SectionsBuilderService.java index c98c688..fec1b29 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/SectionsBuilderService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/SectionsBuilderService.java @@ -44,7 +44,6 @@ public class SectionsBuilderService { for (ClassificationPage page : document.getPages()) { List header = new ArrayList<>(); List footer = new ArrayList<>(); - List unclassifiedText = new ArrayList<>(); for (AbstractPageBlock current : page.getTextBlocks()) { if (current.getClassification() == null) { @@ -63,11 +62,6 @@ public class SectionsBuilderService { continue; } - if (current.getClassification().equals(PageBlockType.OTHER)) { - unclassifiedText.add((TextPageBlock) current); - continue; - } - if (prev != null && current.getClassification().isHeadline() && !prev.getClassification().isHeadline() || !document.isHeadlines()) { ClassificationSection chunkBlock = buildTextBlock(chunkWords, lastHeadline); chunkBlock.setHeadline(lastHeadline); @@ -95,9 +89,6 @@ public class SectionsBuilderService { if (!footer.isEmpty()) { footers.add(new ClassificationFooter(footer)); } - if (!unclassifiedText.isEmpty()) { - unclassifiedTexts.add(new UnclassifiedText(unclassifiedText)); - } } ClassificationSection chunkBlock = buildTextBlock(chunkWords, lastHeadline); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/SimplifiedTextServiceTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/SimplifiedTextServiceTest.java new file mode 100644 index 0000000..d2dfe41 --- /dev/null +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/SimplifiedTextServiceTest.java @@ -0,0 +1,78 @@ +package com.knecon.fforesight.service.layoutparser.server; + +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; + +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.io.ClassPathResource; + +import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedSectionText; +import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedText; +import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingFinishedEvent; +import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest; +import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType; +import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipeline; +import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingStorageService; +import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument; +import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse; +import com.knecon.fforesight.service.layoutparser.processor.services.SimplifiedSectionTextService; +import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory; +import com.knecon.fforesight.service.layoutparser.server.utils.AbstractTest; + +import io.micrometer.observation.Observation; +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class SimplifiedTextServiceTest + extends AbstractTest { + + @Autowired + private LayoutParsingPipeline layoutParsingPipeline; + @Autowired + private SimplifiedSectionTextService simplifiedSectionTextService; + + + @Test + @SneakyThrows + public void testSearchIndexing() { + + File file = new ClassPathResource("files/headerFooterTest3Pages.pdf").getFile(); + String footerExample = "Footer to search for "; + String headerExample ="Header to search for "; + Document document = buildGraph(file); + SimplifiedText simplifiedText = simplifiedSectionTextService.toSimplifiedText(document); + List sectionTexts = simplifiedText.getSectionTexts(); + assertThat(sectionTexts.stream().filter(section -> section.getText().equals(footerExample)).collect(Collectors.toList()).size()).isGreaterThan(0); + assertThat(sectionTexts.stream().filter(section -> section.getText().equals(headerExample)).collect(Collectors.toList()).size()).isGreaterThan(0); + + + + } + + @SneakyThrows + protected Document buildGraph(File file) { + + return DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.REDACT_MANAGER, + layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER, + file, + new ImageServiceResponse(), + new TableServiceResponse(), + new VisualLayoutParsingResponse(), + Map.of("file",file.toString()))); + } + + + +} diff --git a/layoutparser-service/layoutparser-service-server/src/test/resources/files/headerFooterTest3Pages.pdf b/layoutparser-service/layoutparser-service-server/src/test/resources/files/headerFooterTest3Pages.pdf new file mode 100644 index 0000000..f80c5b7 Binary files /dev/null and b/layoutparser-service/layoutparser-service-server/src/test/resources/files/headerFooterTest3Pages.pdf differ