Merge branch 'AZURE_NER' into 'main'
RED-9918: Azure entity recognition (Spike) See merge request fforesight/layout-parser!196
This commit is contained in:
commit
b2fa14dde2
@ -21,5 +21,14 @@ public class SimplifiedText {
|
|||||||
@Schema(description = "A List of simplified Sections, which contains almost exclusively text.")
|
@Schema(description = "A List of simplified Sections, which contains almost exclusively text.")
|
||||||
@Builder.Default
|
@Builder.Default
|
||||||
private List<SimplifiedSectionText> sectionTexts = new ArrayList<>();
|
private List<SimplifiedSectionText> sectionTexts = new ArrayList<>();
|
||||||
|
@Schema(description = "A list of the main section numbers ")
|
||||||
|
@Builder.Default
|
||||||
|
private List<String> mainSectionNumbers = new ArrayList<>();
|
||||||
|
@Schema(description = "A list of the header section numbers ")
|
||||||
|
@Builder.Default
|
||||||
|
private List<String> headerSectionNumbers = new ArrayList<>();
|
||||||
|
@Schema(description = "A list of the footer section numbers ")
|
||||||
|
@Builder.Default
|
||||||
|
private List<String> footerSectionNumbers = new ArrayList<>();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -34,7 +34,22 @@ public class SimplifiedSectionTextService {
|
|||||||
List<SimplifiedSectionText> simplifiedText = Stream.of(simplifiedMainSectionsList, simplifiedHeadersList, simplifiedFootersList)
|
List<SimplifiedSectionText> simplifiedText = Stream.of(simplifiedMainSectionsList, simplifiedHeadersList, simplifiedFootersList)
|
||||||
.flatMap(List::stream)
|
.flatMap(List::stream)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
return SimplifiedText.builder().numberOfPages(document.getNumberOfPages()).sectionTexts(simplifiedText).build();
|
return SimplifiedText.builder()
|
||||||
|
.numberOfPages(document.getNumberOfPages())
|
||||||
|
.sectionTexts(simplifiedText)
|
||||||
|
.mainSectionNumbers(document.getAllSections()
|
||||||
|
.stream()
|
||||||
|
.map(this::getSectionNumber)
|
||||||
|
.toList())
|
||||||
|
.headerSectionNumbers(document.getHeaders()
|
||||||
|
.stream()
|
||||||
|
.map(this::getSectionNumber)
|
||||||
|
.toList())
|
||||||
|
.footerSectionNumbers(document.getFooters()
|
||||||
|
.stream()
|
||||||
|
.map(this::getSectionNumber)
|
||||||
|
.toList())
|
||||||
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -49,4 +64,13 @@ public class SimplifiedSectionTextService {
|
|||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private String getSectionNumber(SemanticNode semanticNode) {
|
||||||
|
|
||||||
|
return semanticNode.getTreeId()
|
||||||
|
.stream()
|
||||||
|
.map(String::valueOf)
|
||||||
|
.collect(Collectors.joining("."));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user