Merge branch 'main' into RED-8701
This commit is contained in:
commit
6a86036a78
@ -44,7 +44,6 @@ public class SectionsBuilderService {
|
||||
for (ClassificationPage page : document.getPages()) {
|
||||
List<TextPageBlock> header = new ArrayList<>();
|
||||
List<TextPageBlock> footer = new ArrayList<>();
|
||||
List<TextPageBlock> unclassifiedText = new ArrayList<>();
|
||||
for (AbstractPageBlock current : page.getTextBlocks()) {
|
||||
|
||||
if (current.getClassification() == null) {
|
||||
@ -63,11 +62,6 @@ public class SectionsBuilderService {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (current.getClassification().equals(PageBlockType.OTHER)) {
|
||||
unclassifiedText.add((TextPageBlock) current);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (prev != null && current.getClassification().isHeadline() && !prev.getClassification().isHeadline() || !document.isHeadlines()) {
|
||||
ClassificationSection chunkBlock = buildTextBlock(chunkWords, lastHeadline);
|
||||
chunkBlock.setHeadline(lastHeadline);
|
||||
@ -95,9 +89,6 @@ public class SectionsBuilderService {
|
||||
if (!footer.isEmpty()) {
|
||||
footers.add(new ClassificationFooter(footer));
|
||||
}
|
||||
if (!unclassifiedText.isEmpty()) {
|
||||
unclassifiedTexts.add(new UnclassifiedText(unclassifiedText));
|
||||
}
|
||||
}
|
||||
|
||||
ClassificationSection chunkBlock = buildTextBlock(chunkWords, lastHeadline);
|
||||
|
||||
@ -0,0 +1,78 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server;
|
||||
|
||||
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedSectionText;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedText;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingFinishedEvent;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipeline;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingStorageService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.SimplifiedSectionTextService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.AbstractTest;
|
||||
|
||||
import io.micrometer.observation.Observation;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public class SimplifiedTextServiceTest
|
||||
extends AbstractTest {
|
||||
|
||||
@Autowired
|
||||
private LayoutParsingPipeline layoutParsingPipeline;
|
||||
@Autowired
|
||||
private SimplifiedSectionTextService simplifiedSectionTextService;
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testSearchIndexing() {
|
||||
|
||||
File file = new ClassPathResource("files/headerFooterTest3Pages.pdf").getFile();
|
||||
String footerExample = "Footer to search for ";
|
||||
String headerExample ="Header to search for ";
|
||||
Document document = buildGraph(file);
|
||||
SimplifiedText simplifiedText = simplifiedSectionTextService.toSimplifiedText(document);
|
||||
List<SimplifiedSectionText> sectionTexts = simplifiedText.getSectionTexts();
|
||||
assertThat(sectionTexts.stream().filter(section -> section.getText().equals(footerExample)).collect(Collectors.toList()).size()).isGreaterThan(0);
|
||||
assertThat(sectionTexts.stream().filter(section -> section.getText().equals(headerExample)).collect(Collectors.toList()).size()).isGreaterThan(0);
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
protected Document buildGraph(File file) {
|
||||
|
||||
return DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.REDACT_MANAGER,
|
||||
layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
|
||||
file,
|
||||
new ImageServiceResponse(),
|
||||
new TableServiceResponse(),
|
||||
new VisualLayoutParsingResponse(),
|
||||
Map.of("file",file.toString())));
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user