diff --git a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts index 595817c6..2e900250 100644 --- a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts +++ b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts @@ -12,7 +12,7 @@ plugins { description = "redaction-service-server-v1" -val layoutParserVersion = "0.141.0" +val layoutParserVersion = "0.160.0" val jacksonVersion = "2.15.2" val droolsVersion = "9.44.0.Final" val pdfBoxVersion = "3.0.0" @@ -91,6 +91,7 @@ dependencies { module = "persistence-service-shared-api-v1" ) } + testImplementation("com.pdftron:PDFNet:10.11.0") } dependencyManagement { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AbstractRedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AbstractRedactionIntegrationTest.java index 630d85f7..c887000b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AbstractRedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AbstractRedactionIntegrationTest.java @@ -75,6 +75,7 @@ import com.mongodb.MongoCommandException; import com.mongodb.client.MongoClient; import com.mongodb.client.MongoClients; import com.mongodb.client.MongoDatabase; +import com.pdftron.pdf.PDFNet; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; @@ -228,6 +229,8 @@ public abstract class AbstractRedactionIntegrationTest { when(tenantProvider.getTenant(any())).thenReturn(builder().tenantId("redaction").mongoDBConnection(mongoDBConnection).build()); tenantMongoLiquibaseExecutor.initializeTenant("redaction"); + + PDFNet.initialize("demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a"); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java index 6a6a4330..c53a5eb3 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java @@ -26,7 +26,7 @@ import com.iqser.red.service.redaction.v1.server.service.document.EntityEnrichme import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; import com.iqser.red.service.redaction.v1.server.utils.EntityVisualizationUtility; import com.knecon.fforesight.service.viewerdoc.model.Visualizations; -import com.knecon.fforesight.service.viewerdoc.service.ViewerDocumentService; +import com.knecon.fforesight.service.viewerdoc.service.PDFTronViewerDocumentService; import com.knecon.fforesight.tenantcommons.TenantContext; import lombok.SneakyThrows; @@ -89,17 +89,11 @@ public class TableTest extends BuildDocumentIntegrationTest { storageService.downloadTo(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.VIEWER_DOCUMENT), file); - ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null); + PDFTronViewerDocumentService viewerDocumentService = new PDFTronViewerDocumentService(null); - var visualizationsOnPage = EntityVisualizationUtility.createVisualizationsOnPage(document.getEntities(), Color.MAGENTA); + var entityLayerGroup = EntityVisualizationUtility.createEntityLayerGroup(document.getEntities(), Color.MAGENTA); - viewerDocumentService.addVisualizationsOnPage(file, - file, - List.of(Visualizations.builder() - .layer(ENTITY_LAYER) - .visualizationsOnPages(visualizationsOnPage) - .layerVisibilityDefaultValue(true) - .build())); + viewerDocumentService.addLayerGroups(file, file, List.of(entityLayerGroup)); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/NerEntitiesAdapterTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/NerEntitiesAdapterTest.java index 4d00e15d..6bede2ee 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/NerEntitiesAdapterTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/adapter/NerEntitiesAdapterTest.java @@ -7,10 +7,12 @@ import java.awt.Color; import java.awt.geom.Rectangle2D; import java.io.File; import java.io.IOException; +import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -176,36 +178,42 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest { private List validateAndCombine(NerEntities nerEntities, Document document) { - List cbiAuthors = nerEntities.streamEntitiesOfType("CBI_author") - .toList(); - Stream cbiAddress = NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities) - .map(boundary -> new NerEntities.NerEntity(document.getTextBlock().subSequence(boundary).toString(), boundary, "CBI_address")); - Stream azureCbiAddress = NerEntitiesAdapter.combineNerEntitiesOfAllGivenTypesWithConfidence(nerEntities, - Set.of("Organization", - "Location", - "Address", - "ORG", - "STREET", - "CITY"), - Set.of("Organization", - "Location", - "Address", - "Quantity", - "ORG", - "STREET", - "POSTAL", - "COUNTRY", - "CARDINAL", - "CITY", - "STATE"), - 50, - 3, - 2, - 0.7) - .map(boundary -> new NerEntities.NerEntity(document.getTextBlock().subSequence(boundary).toString(), boundary, "CBI_address")); + List combinedEntities = new ArrayList<>(); - return Stream.concat(cbiAuthors.stream(), Stream.concat(cbiAddress, azureCbiAddress)) - .toList(); + combinedEntities.addAll(nerEntities.streamEntitiesOfType("CBI_author") + .toList()); + combinedEntities.addAll(nerEntities.streamEntitiesOfType("Person") + .toList()); + combinedEntities.addAll(nerEntities.streamEntitiesOfType("Address") + .toList()); + combinedEntities.addAll(nerEntities.streamEntitiesOfType("Location") + .toList()); + + NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities) + .map(boundary -> new NerEntities.NerEntity(document.getTextBlock().subSequence(boundary).toString(), boundary, "CBI_address")) + .forEach(combinedEntities::add); + + NerEntitiesAdapter.combineNerEntitiesOfAllGivenTypesWithConfidence(nerEntities, + Set.of("Organization", "Location", "Address", "ORG", "STREET", "CITY"), + Set.of("Organization", + "Location", + "Address", + "Quantity", + "ORG", + "STREET", + "POSTAL", + "COUNTRY", + "CARDINAL", + "CITY", + "STATE"), + 50, + 3, + 2, + 0.7) + .map(boundary -> new NerEntities.NerEntity(document.getTextBlock().subSequence(boundary).toString(), boundary, "CBI_address")) + .forEach(combinedEntities::add); + + return combinedEntities; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityLayerGroup.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityLayerGroup.java new file mode 100644 index 00000000..f4e3cce6 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityLayerGroup.java @@ -0,0 +1,38 @@ +package com.iqser.red.service.redaction.v1.server.utils; + +import java.util.List; + +import com.knecon.fforesight.service.viewerdoc.LayerIdentifier; +import com.knecon.fforesight.service.viewerdoc.layers.LayerGroup; +import com.knecon.fforesight.service.viewerdoc.model.Visualizations; + +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public class EntityLayerGroup implements LayerGroup { + + public static final LayerIdentifier ENTITY_LAYER = new LayerIdentifier("Entities", "ENTITIES"); + + private final List visualizations; + + @Override + public LayerIdentifier getGroupIdentifier() { + + return ENTITY_LAYER; + } + + + @Override + public List getVisualizations() { + + return visualizations; + } + + + @Override + public boolean subLayersAreOptionalContent() { + + return false; + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java index 7fae90bc..abe5d49b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java @@ -8,13 +8,12 @@ import java.util.Map; import java.util.Set; import java.util.stream.Collectors; -import org.apache.pdfbox.cos.COSName; - import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; -import com.knecon.fforesight.service.viewerdoc.ContentStreams; +import com.knecon.fforesight.service.viewerdoc.LayerIdentifier; import com.knecon.fforesight.service.viewerdoc.model.ColoredRectangle; +import com.knecon.fforesight.service.viewerdoc.model.Visualizations; import com.knecon.fforesight.service.viewerdoc.model.VisualizationsOnPage; import lombok.experimental.UtilityClass; @@ -22,7 +21,7 @@ import lombok.experimental.UtilityClass; @UtilityClass public class EntityVisualizationUtility { - public static final ContentStreams.Identifier ENTITY_LAYER = new ContentStreams.Identifier("Entities", COSName.getPDFName("KNECON_ENTITIES"), true); + public static final LayerIdentifier ENTITY_LAYER = new LayerIdentifier("Entities", "KNECON_ENTITIES"); public Map createVisualizationsOnPage(Collection entity, Color color) { @@ -58,4 +57,13 @@ public class EntityVisualizationUtility { .toList(); } + + public EntityLayerGroup createEntityLayerGroup(Collection entity, Color color) { + + return new EntityLayerGroup(List.of(Visualizations.builder() + .layer(EntityLayerGroup.ENTITY_LAYER) + .visualizationsOnPages(createVisualizationsOnPage(entity, color)) + .build())); + } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java index 91af7e55..aa02d0f5 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java @@ -37,6 +37,7 @@ public class LayoutParsingRequestProvider { .simplifiedTextStorageId(simplifiedTextStorageId) .viewerDocumentStorageId(viewerDocumentStorageId) .visualLayoutParsingFileId(Optional.empty()) + .documentMarkdownFileStorageId(Optional.empty()) .build(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/ner_entities/intertek.AZURE_NER_ENTITIES.json b/redaction-service-v1/redaction-service-server-v1/src/test/resources/ner_entities/intertek.AZURE_NER_ENTITIES.json index 592a87a0..1d9a7cef 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/ner_entities/intertek.AZURE_NER_ENTITIES.json +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/ner_entities/intertek.AZURE_NER_ENTITIES.json @@ -29,13 +29,6 @@ "type": "PersonType", "confidence": 0.85 }, - { - "value": "manufacturers", - "startOffset": 163, - "endOffset": 176, - "type": "PersonType", - "confidence": 0.5 - }, { "value": "person", "startOffset": 232, @@ -57,13 +50,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "to 36", - "startOffset": 422, - "endOffset": 427, - "type": "Quantity", - "confidence": 0.52 - }, { "value": "Manufacturing process", "startOffset": 428, @@ -400,13 +386,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "Sales data", - "startOffset": 1478, - "endOffset": 1488, - "type": "Skill", - "confidence": 0.58 - }, { "value": "sales", "startOffset": 1498, @@ -428,13 +407,6 @@ "type": "PersonType", "confidence": 0.7 }, - { - "value": "internal study", - "startOffset": 1673, - "endOffset": 1687, - "type": "Skill", - "confidence": 0.5 - }, { "value": "reports", "startOffset": 1779, @@ -456,6 +428,13 @@ "type": "PersonType", "confidence": 0.95 }, + { + "value": "applicants", + "startOffset": 1964, + "endOffset": 1974, + "type": "PersonType", + "confidence": 0.88 + }, { "value": "data", "startOffset": 2001, @@ -535,13 +514,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "applicants", - "startOffset": 392, - "endOffset": 402, - "type": "PersonType", - "confidence": 0.52 - }, { "value": "EFSA", "startOffset": 433, @@ -549,13 +521,6 @@ "type": "Organization", "confidence": 0.97 }, - { - "value": "additives", - "startOffset": 477, - "endOffset": 486, - "type": "Product", - "confidence": 0.5 - }, { "value": "2.2.9.5", "startOffset": 628, @@ -1039,6 +1004,13 @@ "type": "Quantity", "confidence": 0.8 }, + { + "value": "Analytical", + "startOffset": 2220, + "endOffset": 2230, + "type": "Skill", + "confidence": 0.87 + }, { "value": "3.3", "startOffset": 2289, @@ -1074,6 +1046,41 @@ "type": "Quantity", "confidence": 0.8 }, + { + "value": "Compliance", + "startOffset": 2345, + "endOffset": 2355, + "type": "Skill", + "confidence": 0.64 + }, + { + "value": "4.9", + "startOffset": 2412, + "endOffset": 2415, + "type": "Quantity", + "confidence": 0.8 + }, + { + "value": "2", + "startOffset": 2338, + "endOffset": 2339, + "type": "Quantity", + "confidence": 0.8 + }, + { + "value": "3", + "startOffset": 2340, + "endOffset": 2341, + "type": "Quantity", + "confidence": 0.8 + }, + { + "value": "10", + "startOffset": 2342, + "endOffset": 2344, + "type": "Quantity", + "confidence": 0.8 + }, { "value": "4.9", "startOffset": 2412, @@ -1158,20 +1165,6 @@ "type": "Organization", "confidence": 0.88 }, - { - "value": "Food Additives", - "startOffset": 2698, - "endOffset": 2712, - "type": "Product", - "confidence": 0.56 - }, - { - "value": "Commission", - "startOffset": 2738, - "endOffset": 2748, - "type": "Organization", - "confidence": 0.5 - }, { "value": "234/2011", "startOffset": 2768, @@ -1305,13 +1298,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "–", - "startOffset": 3278, - "endOffset": 3279, - "type": "Quantity", - "confidence": 0.51 - }, { "value": "33", "startOffset": 3279, @@ -1540,13 +1526,6 @@ "type": "Product", "confidence": 0.62 }, - { - "value": "food", - "startOffset": 505, - "endOffset": 509, - "type": "Product", - "confidence": 0.51 - }, { "value": "food additive", "startOffset": 602, @@ -1652,13 +1631,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "–", - "startOffset": 971, - "endOffset": 972, - "type": "Quantity", - "confidence": 0.51 - }, { "value": "33", "startOffset": 972, @@ -1819,13 +1791,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "Technical Dossier", - "startOffset": 461, - "endOffset": 478, - "type": "Skill", - "confidence": 0.51 - }, { "value": "1331/20083", "startOffset": 593, @@ -1840,13 +1805,6 @@ "type": "Skill", "confidence": 0.76 }, - { - "value": "assessment", - "startOffset": 657, - "endOffset": 667, - "type": "Skill", - "confidence": 0.58 - }, { "value": "Data", "startOffset": 705, @@ -1854,13 +1812,6 @@ "type": "Skill", "confidence": 0.66 }, - { - "value": "application", - "startOffset": 727, - "endOffset": 738, - "type": "Skill", - "confidence": 0.59 - }, { "value": "234/20114", "startOffset": 796, @@ -1882,13 +1833,6 @@ "type": "Skill", "confidence": 0.96 }, - { - "value": "food", - "startOffset": 906, - "endOffset": 910, - "type": "Product", - "confidence": 0.5 - }, { "value": "EFSA", "startOffset": 942, @@ -2001,13 +1945,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "applicants", - "startOffset": 1602, - "endOffset": 1612, - "type": "PersonType", - "confidence": 0.59 - }, { "value": "EFSA", "startOffset": 1788, @@ -2176,13 +2113,6 @@ "type": "Product", "confidence": 0.62 }, - { - "value": "food flavourings", - "startOffset": 2340, - "endOffset": 2356, - "type": "Product", - "confidence": 0.54 - }, { "value": "64", "startOffset": 2363, @@ -2204,13 +2134,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "24.", - "startOffset": 2384, - "endOffset": 2387, - "type": "Quantity", - "confidence": 0.55 - }, { "value": "http://eur-lex.europa.eu/legalcontent/EN/ALL/?uri=CELEX:32011R0234", "startOffset": 2406, @@ -2276,13 +2199,6 @@ "type": "Product", "confidence": 0.98 }, - { - "value": "food additive", - "startOffset": 95, - "endOffset": 108, - "type": "Product", - "confidence": 0.53 - }, { "value": "European Union", "startOffset": 116, @@ -2320,13 +2236,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "Application", - "startOffset": 49, - "endOffset": 60, - "type": "Skill", - "confidence": 0.54 - }, { "value": "support", "startOffset": 95, @@ -2394,13 +2303,6 @@ "type": "Organization", "confidence": 0.91 }, - { - "value": "ANS Panel", - "startOffset": 194, - "endOffset": 203, - "type": "Organization", - "confidence": 0.58 - }, { "value": "2021", "startOffset": 205, @@ -2434,14 +2336,21 @@ "startOffset": 472, "endOffset": 486, "type": "Product", - "confidence": 0.6 + "confidence": 0.89 }, { - "value": "enzymes", - "startOffset": 493, + "value": "food enzymes", + "startOffset": 488, "endOffset": 500, "type": "Product", - "confidence": 0.49 + "confidence": 0.8 + }, + { + "value": "food flavourings", + "startOffset": 505, + "endOffset": 521, + "type": "Product", + "confidence": 0.68 }, { "value": "1", @@ -2499,13 +2408,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "applicant", - "startOffset": 799, - "endOffset": 808, - "type": "PersonType", - "confidence": 0.53 - }, { "value": "1", "startOffset": 809, @@ -2534,13 +2436,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "manufacture", - "startOffset": 860, - "endOffset": 871, - "type": "Skill", - "confidence": 0.89 - }, { "value": "1", "startOffset": 872, @@ -2632,19 +2527,19 @@ "type": "Quantity", "confidence": 0.8 }, + { + "value": "application", + "startOffset": 1018, + "endOffset": 1029, + "type": "Skill", + "confidence": 0.76 + }, { "value": "food additive", "startOffset": 1051, "endOffset": 1064, "type": "Product", - "confidence": 0.78 - }, - { - "value": "modification", - "startOffset": 1066, - "endOffset": 1078, - "type": "Event", - "confidence": 0.54 + "confidence": 0.85 }, { "value": "1", @@ -2961,13 +2856,6 @@ "type": "Skill", "confidence": 0.68 }, - { - "value": "characteristics", - "startOffset": 2281, - "endOffset": 2296, - "type": "Skill", - "confidence": 0.5 - }, { "value": "2", "startOffset": 2297, @@ -3192,13 +3080,6 @@ "type": "IPAddress", "confidence": 0.8 }, - { - "value": "Toxicokinetics", - "startOffset": 2974, - "endOffset": 2988, - "type": "Skill", - "confidence": 0.57 - }, { "value": "3.9", "startOffset": 2989, @@ -3724,13 +3605,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "Specifications", - "startOffset": 934, - "endOffset": 948, - "type": "Skill", - "confidence": 0.55 - }, { "value": "Batch Analysis", "startOffset": 953, @@ -3801,13 +3675,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "Contaminants", - "startOffset": 1115, - "endOffset": 1127, - "type": "Skill", - "confidence": 0.53 - }, { "value": "3", "startOffset": 1128, @@ -4382,13 +4249,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "Analytical", - "startOffset": 3117, - "endOffset": 3127, - "type": "Skill", - "confidence": 0.56 - }, { "value": "4.9", "startOffset": 3223, @@ -4480,13 +4340,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "Cagate", - "startOffset": 3626, - "endOffset": 3632, - "type": "Skill", - "confidence": 0.5 - }, { "value": "2", "startOffset": 3641, @@ -4613,13 +4466,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "EU", - "startOffset": 4044, - "endOffset": 4046, - "type": "Organization", - "confidence": 0.49 - }, { "value": "234/2011", "startOffset": 4051, @@ -4646,21 +4492,14 @@ "startOffset": 4161, "endOffset": 4165, "type": "Organization", - "confidence": 0.93 - }, - { - "value": "ANS", - "startOffset": 4166, - "endOffset": 4169, - "type": "Organization", - "confidence": 0.47 + "confidence": 0.95 }, { "value": "2021", "startOffset": 4177, "endOffset": 4181, "type": "DateTime", - "confidence": 0.97 + "confidence": 0.98 }, { "value": "1", @@ -4732,6 +4571,48 @@ "type": "Quantity", "confidence": 0.8 }, + { + "value": "2", + "startOffset": 4295, + "endOffset": 4296, + "type": "Quantity", + "confidence": 0.8 + }, + { + "value": "2", + "startOffset": 4297, + "endOffset": 4298, + "type": "Quantity", + "confidence": 0.8 + }, + { + "value": "1", + "startOffset": 4299, + "endOffset": 4300, + "type": "Quantity", + "confidence": 0.8 + }, + { + "value": "1", + "startOffset": 4301, + "endOffset": 4302, + "type": "Quantity", + "confidence": 0.8 + }, + { + "value": "2.2.2.1", + "startOffset": 4347, + "endOffset": 4354, + "type": "IPAddress", + "confidence": 0.8 + }, + { + "value": "1", + "startOffset": 4355, + "endOffset": 4356, + "type": "Quantity", + "confidence": 0.8 + }, { "value": "2.2.2.1", "startOffset": 4403, @@ -4746,13 +4627,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "Cassia Gum", - "startOffset": 4427, - "endOffset": 4437, - "type": "Product", - "confidence": 0.73 - }, { "value": "2", "startOffset": 4459, @@ -4781,13 +4655,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "Cassia Gum", - "startOffset": 4486, - "endOffset": 4496, - "type": "Product", - "confidence": 0.96 - }, { "value": "2", "startOffset": 4503, @@ -4816,13 +4683,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "Cassia Gum", - "startOffset": 4530, - "endOffset": 4540, - "type": "Product", - "confidence": 0.86 - }, { "value": "3", "startOffset": 4547, @@ -4852,11 +4712,11 @@ "confidence": 0.8 }, { - "value": "Batch Analysis", - "startOffset": 4555, + "value": "Analysis", + "startOffset": 4561, "endOffset": 4569, "type": "Skill", - "confidence": 0.94 + "confidence": 0.69 }, { "value": "16", @@ -4865,13 +4725,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "Cassia", - "startOffset": 4588, - "endOffset": 4594, - "type": "Product", - "confidence": 0.63 - }, { "value": "3.1.2.1", "startOffset": 4605, @@ -4887,11 +4740,11 @@ "confidence": 0.8 }, { - "value": "Heavy Metal Analysis", + "value": "Heavy Metal", "startOffset": 4615, - "endOffset": 4635, + "endOffset": 4626, "type": "Skill", - "confidence": 0.89 + "confidence": 0.64 }, { "value": "21", @@ -4900,13 +4753,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "Cassia", - "startOffset": 4654, - "endOffset": 4660, - "type": "Product", - "confidence": 0.74 - }, { "value": "3.1.2.2", "startOffset": 4671, @@ -4921,20 +4767,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "Analysis", - "startOffset": 4681, - "endOffset": 4689, - "type": "Skill", - "confidence": 0.6 - }, - { - "value": "Residual", - "startOffset": 4694, - "endOffset": 4702, - "type": "Skill", - "confidence": 0.52 - }, { "value": "16", "startOffset": 4715, @@ -4956,13 +4788,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "Microbial Analysis", - "startOffset": 4756, - "endOffset": 4774, - "type": "Skill", - "confidence": 0.97 - }, { "value": "16", "startOffset": 4779, @@ -5124,20 +4949,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": ".8%", - "startOffset": 5189, - "endOffset": 5192, - "type": "Quantity", - "confidence": 0.89 - }, - { - "value": "168", - "startOffset": 5209, - "endOffset": 5212, - "type": "Quantity", - "confidence": 0.8 - }, { "value": "3", "startOffset": 5239, @@ -5194,13 +5005,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "European Union", - "startOffset": 5464, - "endOffset": 5478, - "type": "Organization", - "confidence": 0.69 - }, { "value": "3", "startOffset": 5485, @@ -5229,6 +5033,13 @@ "type": "Quantity", "confidence": 0.8 }, + { + "value": "FoodEx2", + "startOffset": 5632, + "endOffset": 5639, + "type": "Organization", + "confidence": 0.81 + }, { "value": "3.9", "startOffset": 5673, @@ -5271,6 +5082,20 @@ "type": "Quantity", "confidence": 0.8 }, + { + "value": "Cassia Gum", + "startOffset": 5807, + "endOffset": 5817, + "type": "Product", + "confidence": 0.94 + }, + { + "value": "European Union", + "startOffset": 5825, + "endOffset": 5839, + "type": "Organization", + "confidence": 0.69 + }, { "value": "3", "startOffset": 5846, @@ -5304,7 +5129,7 @@ "startOffset": 5969, "endOffset": 5979, "type": "Product", - "confidence": 0.95 + "confidence": 0.89 } ], "2.1.6.5": [ @@ -5447,13 +5272,6 @@ "type": "DateTime", "confidence": 0.99 }, - { - "value": "Cassia", - "startOffset": 419, - "endOffset": 425, - "type": "Product", - "confidence": 0.58 - }, { "value": "(75°C", "startOffset": 614, @@ -5461,13 +5279,6 @@ "type": "Quantity", "confidence": 1.0 }, - { - "value": "insoluble", - "startOffset": 622, - "endOffset": 631, - "type": "Skill", - "confidence": 0.53 - }, { "value": "ethanol", "startOffset": 635, @@ -5482,13 +5293,6 @@ "type": "Product", "confidence": 0.72 }, - { - "value": "solvents", - "startOffset": 683, - "endOffset": 691, - "type": "Product", - "confidence": 0.49 - }, { "value": "3.1", "startOffset": 729, @@ -5510,13 +5314,6 @@ "type": "Organization", "confidence": 0.98 }, - { - "value": "EINECS", - "startOffset": 806, - "endOffset": 812, - "type": "Organization", - "confidence": 0.58 - }, { "value": "IUPAC", "startOffset": 878, @@ -5566,13 +5363,6 @@ "type": "Skill", "confidence": 0.87 }, - { - "value": "Mannose", - "startOffset": 1275, - "endOffset": 1282, - "type": "Product", - "confidence": 0.58 - }, { "value": "polymer", "startOffset": 1334, @@ -5608,12 +5398,26 @@ "type": "Quantity", "confidence": 0.97 }, + { + "value": "1,6", + "startOffset": 1464, + "endOffset": 1467, + "type": "Quantity", + "confidence": 0.8 + }, + { + "value": "10-12%", + "startOffset": 1524, + "endOffset": 1530, + "type": "Quantity", + "confidence": 0.91 + }, { "value": "5%", "startOffset": 1579, "endOffset": 1581, "type": "Quantity", - "confidence": 0.99 + "confidence": 1.0 } ], "2.1.6.1": [ @@ -5631,13 +5435,6 @@ "type": "Product", "confidence": 0.98 }, - { - "value": "additive", - "startOffset": 74, - "endOffset": 82, - "type": "Product", - "confidence": 0.56 - }, { "value": "EU", "startOffset": 90, @@ -5701,13 +5498,6 @@ "type": "Skill", "confidence": 0.9 }, - { - "value": "gelling", - "startOffset": 359, - "endOffset": 366, - "type": "Skill", - "confidence": 0.57 - }, { "value": "3.6", "startOffset": 466, @@ -5876,13 +5666,6 @@ "type": "Organization", "confidence": 0.95 }, - { - "value": "food additive", - "startOffset": 1346, - "endOffset": 1359, - "type": "Product", - "confidence": 0.56 - }, { "value": "EFSA ANS Panel", "startOffset": 1373, @@ -6053,13 +5836,6 @@ "type": "Quantity", "confidence": 0.8 }, - { - "value": "–", - "startOffset": 443, - "endOffset": 444, - "type": "Quantity", - "confidence": 0.51 - }, { "value": "33", "startOffset": 444, diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/ner_entities/intertek.NER_ENTITIES.json b/redaction-service-v1/redaction-service-server-v1/src/test/resources/ner_entities/intertek.NER_ENTITIES.json index ab2ce425..640411f2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/ner_entities/intertek.NER_ENTITIES.json +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/ner_entities/intertek.NER_ENTITIES.json @@ -1 +1,247 @@ -{"dossierId": "2e41b84e-30ed-4098-b722-ed309a8a5bfb", "fileId": "caf8ba677d05df7a6625449e43c20baf", "targetFileExtension": "SIMPLIFIED_TEXT.json.gz", "responseFileExtension": "NER_ENTITIES.json.gz", "X-TENANT-ID": "redaction", "data": {"2.1.1": [{"value": "Corporation Buckingham Palace Buckingham Palace Road", "startOffset": 73, "endOffset": 125, "type": "DEPARTMENT"}, {"value": "Head of Alliance Management", "startOffset": 194, "endOffset": 221, "type": "JOB_TITEL"}, {"value": "john.smith@smithcorp.com", "startOffset": 246, "endOffset": 270, "type": "MAIL"}], "2.1.2": [{"value": "Corporation Buckingham Palace Buckingham Palace Road", "startOffset": 79, "endOffset": 131, "type": "DEPARTMENT"}, {"value": "Head of Manufacture Tel", "startOffset": 199, "endOffset": 222, "type": "JOB_TITEL"}, {"value": "mimi.lang@smithcorp.com", "startOffset": 243, "endOffset": 266, "type": "MAIL"}], "2.1.3": [{"value": "+44 (0)1252 392460 Email:", "startOffset": 139, "endOffset": 164, "type": "PHONE"}, {"value": "United Kingdom", "startOffset": 338, "endOffset": 352, "type": "COUNTRY"}], "2.1.6.1": [{"value": "EU", "startOffset": 90, "endOffset": 92, "type": "ORG"}, {"value": "EU", "startOffset": 263, "endOffset": 265, "type": "ORG"}, {"value": "European Food Safety Authority", "startOffset": 1280, "endOffset": 1310, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 1373, "endOffset": 1387, "type": "ORG"}], "2.1.6.4": [{"value": "Gidley", "startOffset": 66, "endOffset": 72, "type": "CBI_author"}, {"value": "Dentini", "startOffset": 149, "endOffset": 156, "type": "CBI_author"}, {"value": "Lang", "startOffset": 87, "endOffset": 91, "type": "CBI_author"}, {"value": "Kajiwara", "startOffset": 96, "endOffset": 104, "type": "CBI_author"}, {"value": "Kato", "startOffset": 112, "endOffset": 116, "type": "CBI_author"}, {"value": "Lang", "startOffset": 184, "endOffset": 188, "type": "CBI_author"}], "2.1.9": [{"value": "EFSA", "startOffset": 4101, "endOffset": 4105, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 4161, "endOffset": 4175, "type": "ORG"}, {"value": "Ames", "startOffset": 2392, "endOffset": 2396, "type": "NO_AUTHOR"}], "2.1.10.2": [{"value": "EFSA", "startOffset": 942, "endOffset": 946, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 1002, "endOffset": 1016, "type": "ORG"}, {"value": "EFSA", "startOffset": 1101, "endOffset": 1105, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 1161, "endOffset": 1175, "type": "ORG"}, {"value": "EFSA", "startOffset": 1788, "endOffset": 1792, "type": "ORG"}], "2.1.10.3": [{"value": "EFSA", "startOffset": 129, "endOffset": 133, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 189, "endOffset": 203, "type": "ORG"}], "2.1.10.4": [{"value": "EFSA", "startOffset": 129, "endOffset": 133, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 189, "endOffset": 203, "type": "ORG"}, {"value": "EFSA", "startOffset": 2618, "endOffset": 2622, "type": "ORG"}, {"value": "European Food Safety Authority", "startOffset": 2625, "endOffset": 2655, "type": "ORG"}, {"value": "European Food Safety Authority", "startOffset": 3893, "endOffset": 3923, "type": "ORG"}], "2.1.10.5": [{"value": "EFSA", "startOffset": 129, "endOffset": 133, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 189, "endOffset": 203, "type": "ORG"}], "17": [{"value": "2", "startOffset": 18, "endOffset": 19, "type": "CARDINAL"}]}} \ No newline at end of file +{ + "dossierId": "83ca2709-e320-4162-acd2-9cee1f478c92", + "fileId": "21ca7bfb67f80db00d973d43155a0083", + "targetFileExtension": "SIMPLIFIED_TEXT.json.gz", + "responseFileExtension": "NER_ENTITIES.json.gz", + "X-TENANT-ID": "redaction", + "data": { + "2.1.1": [ + { + "value": "Corporation Buckingham Palace Buckingham Palace Road", + "startOffset": 73, + "endOffset": 125, + "type": "DEPARTMENT" + }, + { + "value": "Head of Alliance Management", + "startOffset": 194, + "endOffset": 221, + "type": "JOB_TITEL" + }, + { + "value": "john.smith@smithcorp.com", + "startOffset": 246, + "endOffset": 270, + "type": "MAIL" + } + ], + "2.1.2": [ + { + "value": "Corporation Buckingham Palace Buckingham Palace Road", + "startOffset": 79, + "endOffset": 131, + "type": "DEPARTMENT" + }, + { + "value": "Head of Manufacture Tel", + "startOffset": 199, + "endOffset": 222, + "type": "JOB_TITEL" + }, + { + "value": "mimi.lang@smithcorp.com", + "startOffset": 243, + "endOffset": 266, + "type": "MAIL" + } + ], + "2.1.3": [ + { + "value": "+44 (0)1252 392460 Email:", + "startOffset": 139, + "endOffset": 164, + "type": "PHONE" + }, + { + "value": "United Kingdom", + "startOffset": 338, + "endOffset": 352, + "type": "COUNTRY" + } + ], + "2.1.6.1": [ + { + "value": "EU", + "startOffset": 90, + "endOffset": 92, + "type": "ORG" + }, + { + "value": "EU", + "startOffset": 263, + "endOffset": 265, + "type": "ORG" + }, + { + "value": "European Food Safety Authority", + "startOffset": 1280, + "endOffset": 1310, + "type": "ORG" + }, + { + "value": "EFSA ANS Panel", + "startOffset": 1373, + "endOffset": 1387, + "type": "ORG" + } + ], + "2.1.6.4": [ + { + "value": "Gidley", + "startOffset": 66, + "endOffset": 72, + "type": "CBI_author" + }, + { + "value": "Dentini", + "startOffset": 149, + "endOffset": 156, + "type": "CBI_author" + }, + { + "value": "Lang", + "startOffset": 87, + "endOffset": 91, + "type": "CBI_author" + }, + { + "value": "Kajiwara", + "startOffset": 96, + "endOffset": 104, + "type": "CBI_author" + }, + { + "value": "Kato", + "startOffset": 112, + "endOffset": 116, + "type": "CBI_author" + }, + { + "value": "Lang", + "startOffset": 184, + "endOffset": 188, + "type": "CBI_author" + } + ], + "2.1.9": [ + { + "value": "EFSA", + "startOffset": 4101, + "endOffset": 4105, + "type": "ORG" + }, + { + "value": "EFSA ANS Panel", + "startOffset": 4161, + "endOffset": 4175, + "type": "ORG" + }, + { + "value": "Ames", + "startOffset": 2392, + "endOffset": 2396, + "type": "NO_AUTHOR" + } + ], + "2.1.10.2": [ + { + "value": "EFSA", + "startOffset": 942, + "endOffset": 946, + "type": "ORG" + }, + { + "value": "EFSA ANS Panel", + "startOffset": 1002, + "endOffset": 1016, + "type": "ORG" + }, + { + "value": "EFSA", + "startOffset": 1101, + "endOffset": 1105, + "type": "ORG" + }, + { + "value": "EFSA ANS Panel", + "startOffset": 1161, + "endOffset": 1175, + "type": "ORG" + }, + { + "value": "EFSA", + "startOffset": 1788, + "endOffset": 1792, + "type": "ORG" + } + ], + "2.1.10.3": [ + { + "value": "EFSA", + "startOffset": 129, + "endOffset": 133, + "type": "ORG" + }, + { + "value": "EFSA ANS Panel", + "startOffset": 189, + "endOffset": 203, + "type": "ORG" + } + ], + "2.1.10.4": [ + { + "value": "EFSA", + "startOffset": 129, + "endOffset": 133, + "type": "ORG" + }, + { + "value": "EFSA ANS Panel", + "startOffset": 189, + "endOffset": 203, + "type": "ORG" + }, + { + "value": "EFSA", + "startOffset": 2618, + "endOffset": 2622, + "type": "ORG" + }, + { + "value": "European Food Safety Authority", + "startOffset": 2625, + "endOffset": 2655, + "type": "ORG" + }, + { + "value": "European Food Safety Authority", + "startOffset": 3893, + "endOffset": 3923, + "type": "ORG" + } + ], + "2.1.10.5": [ + { + "value": "EFSA", + "startOffset": 129, + "endOffset": 133, + "type": "ORG" + }, + { + "value": "EFSA ANS Panel", + "startOffset": 189, + "endOffset": 203, + "type": "ORG" + } + ], + "17": [ + { + "value": "2", + "startOffset": 18, + "endOffset": 19, + "type": "CARDINAL" + } + ] + } +} \ No newline at end of file