Merge branch 'AZURE_NER' into 'main'
RED-9918: Azure entity recognition (Spike) See merge request fforesight/layout-parser!196
This commit is contained in:
commit
b2fa14dde2
@ -21,5 +21,14 @@ public class SimplifiedText {
|
||||
@Schema(description = "A List of simplified Sections, which contains almost exclusively text.")
|
||||
@Builder.Default
|
||||
private List<SimplifiedSectionText> sectionTexts = new ArrayList<>();
|
||||
@Schema(description = "A list of the main section numbers ")
|
||||
@Builder.Default
|
||||
private List<String> mainSectionNumbers = new ArrayList<>();
|
||||
@Schema(description = "A list of the header section numbers ")
|
||||
@Builder.Default
|
||||
private List<String> headerSectionNumbers = new ArrayList<>();
|
||||
@Schema(description = "A list of the footer section numbers ")
|
||||
@Builder.Default
|
||||
private List<String> footerSectionNumbers = new ArrayList<>();
|
||||
|
||||
}
|
||||
|
||||
@ -34,7 +34,22 @@ public class SimplifiedSectionTextService {
|
||||
List<SimplifiedSectionText> simplifiedText = Stream.of(simplifiedMainSectionsList, simplifiedHeadersList, simplifiedFootersList)
|
||||
.flatMap(List::stream)
|
||||
.collect(Collectors.toList());
|
||||
return SimplifiedText.builder().numberOfPages(document.getNumberOfPages()).sectionTexts(simplifiedText).build();
|
||||
return SimplifiedText.builder()
|
||||
.numberOfPages(document.getNumberOfPages())
|
||||
.sectionTexts(simplifiedText)
|
||||
.mainSectionNumbers(document.getAllSections()
|
||||
.stream()
|
||||
.map(this::getSectionNumber)
|
||||
.toList())
|
||||
.headerSectionNumbers(document.getHeaders()
|
||||
.stream()
|
||||
.map(this::getSectionNumber)
|
||||
.toList())
|
||||
.footerSectionNumbers(document.getFooters()
|
||||
.stream()
|
||||
.map(this::getSectionNumber)
|
||||
.toList())
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
@ -49,4 +64,13 @@ public class SimplifiedSectionTextService {
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private String getSectionNumber(SemanticNode semanticNode) {
|
||||
|
||||
return semanticNode.getTreeId()
|
||||
.stream()
|
||||
.map(String::valueOf)
|
||||
.collect(Collectors.joining("."));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user