RED-9374: Ner Entities are at wrong locations
This commit is contained in:
parent
36c7a61265
commit
7918db25a9
@ -12,7 +12,7 @@ plugins {
|
||||
description = "redaction-service-server-v1"
|
||||
|
||||
|
||||
val layoutParserVersion = "0.139.0"
|
||||
val layoutParserVersion = "0.141.0"
|
||||
val jacksonVersion = "2.15.2"
|
||||
val droolsVersion = "9.44.0.Final"
|
||||
val pdfBoxVersion = "3.0.0"
|
||||
|
||||
@ -11,7 +11,7 @@ import lombok.NoArgsConstructor;
|
||||
@NoArgsConstructor
|
||||
public class EntityRecognitionSection {
|
||||
|
||||
private int sectionNumber;
|
||||
private String sectionNumber;
|
||||
private String text;
|
||||
|
||||
}
|
||||
|
||||
@ -13,6 +13,6 @@ import lombok.NoArgsConstructor;
|
||||
@AllArgsConstructor
|
||||
public class NerEntitiesModel {
|
||||
|
||||
private Map<Integer, List<EntityRecognitionEntity>> data = new HashMap<>();
|
||||
private Map<String, List<EntityRecognitionEntity>> data = new HashMap<>();
|
||||
|
||||
}
|
||||
|
||||
@ -1,10 +1,13 @@
|
||||
package com.iqser.red.service.redaction.v1.server.service.document;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
|
||||
@ -14,7 +17,6 @@ import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
@ -44,11 +46,10 @@ public class NerEntitiesAdapter {
|
||||
*/
|
||||
public NerEntities toNerEntities(NerEntitiesModel nerEntitiesModel, Document document) {
|
||||
|
||||
return new NerEntities(addOffsetsAndFlatten(getStringStartOffsetsForMainSectionsHeadersFooters(document),
|
||||
nerEntitiesModel).map(nerEntityModel -> new NerEntities.NerEntity(nerEntityModel.getValue(),
|
||||
new TextRange(nerEntityModel.getStartOffset(),
|
||||
nerEntityModel.getEndOffset()),
|
||||
nerEntityModel.getType()))
|
||||
return new NerEntities(addOffsetsAndFlatten(getStringStartOffsetsForMainSectionsHeadersFooters(document), nerEntitiesModel).map(nerEntityModel -> new NerEntities.NerEntity(
|
||||
nerEntityModel.getValue(),
|
||||
new TextRange(nerEntityModel.getStartOffset(), nerEntityModel.getEndOffset()),
|
||||
nerEntityModel.getType()))
|
||||
.toList());
|
||||
}
|
||||
|
||||
@ -161,11 +162,12 @@ public class NerEntitiesAdapter {
|
||||
}
|
||||
|
||||
|
||||
private static Stream<EntityRecognitionEntity> addOffsetsAndFlatten(List<Integer> stringOffsetsForMainSectionsHeadersFooters, NerEntitiesModel nerEntitiesModel) {
|
||||
private static Stream<EntityRecognitionEntity> addOffsetsAndFlatten(Map<List<Integer>, Integer> stringOffsetsForMainSectionsHeadersFooters, NerEntitiesModel nerEntitiesModel) {
|
||||
|
||||
nerEntitiesModel.getData()
|
||||
.forEach((sectionNumber, listOfNerEntities) -> listOfNerEntities.forEach(entityRecognitionEntity -> {
|
||||
int newStartOffset = entityRecognitionEntity.getStartOffset() + stringOffsetsForMainSectionsHeadersFooters.get(sectionNumber);
|
||||
int newStartOffset = entityRecognitionEntity.getStartOffset() + stringOffsetsForMainSectionsHeadersFooters.getOrDefault(sectionNumberToTreeId(sectionNumber),
|
||||
0);
|
||||
entityRecognitionEntity.setStartOffset(newStartOffset);
|
||||
entityRecognitionEntity.setEndOffset(newStartOffset + entityRecognitionEntity.getValue().length());
|
||||
}));
|
||||
@ -175,14 +177,19 @@ public class NerEntitiesAdapter {
|
||||
}
|
||||
|
||||
|
||||
private static List<Integer> getStringStartOffsetsForMainSectionsHeadersFooters(Document document) {
|
||||
private static List<Integer> sectionNumberToTreeId(String sectionNumber) {
|
||||
|
||||
return Arrays.stream(sectionNumber.split("\\."))
|
||||
.map(Integer::parseInt)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
private static Map<List<Integer>, Integer> getStringStartOffsetsForMainSectionsHeadersFooters(Document document) {
|
||||
|
||||
return document.streamAllSubNodes()
|
||||
.filter(child -> (child.getType().equals(NodeType.FOOTER) ||child.getType().equals(NodeType.HEADER) ||child.getType().equals(NodeType.SECTION)))
|
||||
.map(SemanticNode::getTextBlock)
|
||||
.map(TextBlock::getTextRange)
|
||||
.map(TextRange::start)
|
||||
.toList();
|
||||
.filter(child -> child.getType().equals(NodeType.FOOTER) || child.getType().equals(NodeType.HEADER) || child.getType().equals(NodeType.SECTION))
|
||||
.collect(Collectors.toMap(SemanticNode::getTreeId, child -> child.getTextBlock().getTextRange().start()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -308,7 +308,7 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
|
||||
assertEquals("2-[(2-(1-hydroxy-ethyl)-6methyl-phenyl-amino]propan-1-ol (", textEntity.getTextBefore());
|
||||
assertEquals(" of metabolite of", textEntity.getTextAfter());
|
||||
assertEquals(searchTerm, textEntity.getValue());
|
||||
assertEquals(7, textEntity.getIntersectingNodes().size());
|
||||
assertEquals(8, textEntity.getIntersectingNodes().size());
|
||||
assertEquals("Table 2.7-1: List of substances and metabolites and related structural formula ",
|
||||
textEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText());
|
||||
assertTrue(textEntity.getPages()
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user