Merge branch 'AZURE_NER_FP' into 'master'

Update layout parser version for azure ner service

See merge request redactmanager/redaction-service!497
This commit is contained in:
Maverick Studer 2024-08-27 10:13:58 +02:00
commit 004f6cb5f9
9 changed files with 499 additions and 424 deletions

View File

@ -12,7 +12,7 @@ plugins {
description = "redaction-service-server-v1"
val layoutParserVersion = "0.141.0"
val layoutParserVersion = "0.160.0"
val jacksonVersion = "2.15.2"
val droolsVersion = "9.44.0.Final"
val pdfBoxVersion = "3.0.0"
@ -91,6 +91,7 @@ dependencies {
module = "persistence-service-shared-api-v1"
)
}
testImplementation("com.pdftron:PDFNet:10.11.0")
}
dependencyManagement {

View File

@ -75,6 +75,7 @@ import com.mongodb.MongoCommandException;
import com.mongodb.client.MongoClient;
import com.mongodb.client.MongoClients;
import com.mongodb.client.MongoDatabase;
import com.pdftron.pdf.PDFNet;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@ -228,6 +229,8 @@ public abstract class AbstractRedactionIntegrationTest {
when(tenantProvider.getTenant(any())).thenReturn(builder().tenantId("redaction").mongoDBConnection(mongoDBConnection).build());
tenantMongoLiquibaseExecutor.initializeTenant("redaction");
PDFNet.initialize("demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a");
}

View File

@ -26,7 +26,7 @@ import com.iqser.red.service.redaction.v1.server.service.document.EntityEnrichme
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.service.redaction.v1.server.utils.EntityVisualizationUtility;
import com.knecon.fforesight.service.viewerdoc.model.Visualizations;
import com.knecon.fforesight.service.viewerdoc.service.ViewerDocumentService;
import com.knecon.fforesight.service.viewerdoc.service.PDFTronViewerDocumentService;
import com.knecon.fforesight.tenantcommons.TenantContext;
import lombok.SneakyThrows;
@ -89,17 +89,11 @@ public class TableTest extends BuildDocumentIntegrationTest {
storageService.downloadTo(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.VIEWER_DOCUMENT),
file);
ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null);
PDFTronViewerDocumentService viewerDocumentService = new PDFTronViewerDocumentService(null);
var visualizationsOnPage = EntityVisualizationUtility.createVisualizationsOnPage(document.getEntities(), Color.MAGENTA);
var entityLayerGroup = EntityVisualizationUtility.createEntityLayerGroup(document.getEntities(), Color.MAGENTA);
viewerDocumentService.addVisualizationsOnPage(file,
file,
List.of(Visualizations.builder()
.layer(ENTITY_LAYER)
.visualizationsOnPages(visualizationsOnPage)
.layerVisibilityDefaultValue(true)
.build()));
viewerDocumentService.addLayerGroups(file, file, List.of(entityLayerGroup));
}
}

View File

@ -7,10 +7,12 @@ import java.awt.Color;
import java.awt.geom.Rectangle2D;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@ -176,36 +178,42 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
private List<NerEntities.NerEntity> validateAndCombine(NerEntities nerEntities, Document document) {
List<NerEntities.NerEntity> cbiAuthors = nerEntities.streamEntitiesOfType("CBI_author")
.toList();
Stream<NerEntities.NerEntity> cbiAddress = NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
.map(boundary -> new NerEntities.NerEntity(document.getTextBlock().subSequence(boundary).toString(), boundary, "CBI_address"));
Stream<NerEntities.NerEntity> azureCbiAddress = NerEntitiesAdapter.combineNerEntitiesOfAllGivenTypesWithConfidence(nerEntities,
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"CITY"),
Set.of("Organization",
"Location",
"Address",
"Quantity",
"ORG",
"STREET",
"POSTAL",
"COUNTRY",
"CARDINAL",
"CITY",
"STATE"),
50,
3,
2,
0.7)
.map(boundary -> new NerEntities.NerEntity(document.getTextBlock().subSequence(boundary).toString(), boundary, "CBI_address"));
List<NerEntities.NerEntity> combinedEntities = new ArrayList<>();
return Stream.concat(cbiAuthors.stream(), Stream.concat(cbiAddress, azureCbiAddress))
.toList();
combinedEntities.addAll(nerEntities.streamEntitiesOfType("CBI_author")
.toList());
combinedEntities.addAll(nerEntities.streamEntitiesOfType("Person")
.toList());
combinedEntities.addAll(nerEntities.streamEntitiesOfType("Address")
.toList());
combinedEntities.addAll(nerEntities.streamEntitiesOfType("Location")
.toList());
NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
.map(boundary -> new NerEntities.NerEntity(document.getTextBlock().subSequence(boundary).toString(), boundary, "CBI_address"))
.forEach(combinedEntities::add);
NerEntitiesAdapter.combineNerEntitiesOfAllGivenTypesWithConfidence(nerEntities,
Set.of("Organization", "Location", "Address", "ORG", "STREET", "CITY"),
Set.of("Organization",
"Location",
"Address",
"Quantity",
"ORG",
"STREET",
"POSTAL",
"COUNTRY",
"CARDINAL",
"CITY",
"STATE"),
50,
3,
2,
0.7)
.map(boundary -> new NerEntities.NerEntity(document.getTextBlock().subSequence(boundary).toString(), boundary, "CBI_address"))
.forEach(combinedEntities::add);
return combinedEntities;
}

View File

@ -0,0 +1,38 @@
package com.iqser.red.service.redaction.v1.server.utils;
import java.util.List;
import com.knecon.fforesight.service.viewerdoc.LayerIdentifier;
import com.knecon.fforesight.service.viewerdoc.layers.LayerGroup;
import com.knecon.fforesight.service.viewerdoc.model.Visualizations;
import lombok.RequiredArgsConstructor;
@RequiredArgsConstructor
public class EntityLayerGroup implements LayerGroup {
public static final LayerIdentifier ENTITY_LAYER = new LayerIdentifier("Entities", "ENTITIES");
private final List<Visualizations> visualizations;
@Override
public LayerIdentifier getGroupIdentifier() {
return ENTITY_LAYER;
}
@Override
public List<Visualizations> getVisualizations() {
return visualizations;
}
@Override
public boolean subLayersAreOptionalContent() {
return false;
}
}

View File

@ -8,13 +8,12 @@ import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.pdfbox.cos.COSName;
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.knecon.fforesight.service.viewerdoc.ContentStreams;
import com.knecon.fforesight.service.viewerdoc.LayerIdentifier;
import com.knecon.fforesight.service.viewerdoc.model.ColoredRectangle;
import com.knecon.fforesight.service.viewerdoc.model.Visualizations;
import com.knecon.fforesight.service.viewerdoc.model.VisualizationsOnPage;
import lombok.experimental.UtilityClass;
@ -22,7 +21,7 @@ import lombok.experimental.UtilityClass;
@UtilityClass
public class EntityVisualizationUtility {
public static final ContentStreams.Identifier ENTITY_LAYER = new ContentStreams.Identifier("Entities", COSName.getPDFName("KNECON_ENTITIES"), true);
public static final LayerIdentifier ENTITY_LAYER = new LayerIdentifier("Entities", "KNECON_ENTITIES");
public Map<Integer, VisualizationsOnPage> createVisualizationsOnPage(Collection<TextEntity> entity, Color color) {
@ -58,4 +57,13 @@ public class EntityVisualizationUtility {
.toList();
}
public EntityLayerGroup createEntityLayerGroup(Collection<TextEntity> entity, Color color) {
return new EntityLayerGroup(List.of(Visualizations.builder()
.layer(EntityLayerGroup.ENTITY_LAYER)
.visualizationsOnPages(createVisualizationsOnPage(entity, color))
.build()));
}
}

View File

@ -37,6 +37,7 @@ public class LayoutParsingRequestProvider {
.simplifiedTextStorageId(simplifiedTextStorageId)
.viewerDocumentStorageId(viewerDocumentStorageId)
.visualLayoutParsingFileId(Optional.empty())
.documentMarkdownFileStorageId(Optional.empty())
.build();
}

View File

@ -29,13 +29,6 @@
"type": "PersonType",
"confidence": 0.85
},
{
"value": "manufacturers",
"startOffset": 163,
"endOffset": 176,
"type": "PersonType",
"confidence": 0.5
},
{
"value": "person",
"startOffset": 232,
@ -57,13 +50,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "to 36",
"startOffset": 422,
"endOffset": 427,
"type": "Quantity",
"confidence": 0.52
},
{
"value": "Manufacturing process",
"startOffset": 428,
@ -400,13 +386,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "Sales data",
"startOffset": 1478,
"endOffset": 1488,
"type": "Skill",
"confidence": 0.58
},
{
"value": "sales",
"startOffset": 1498,
@ -428,13 +407,6 @@
"type": "PersonType",
"confidence": 0.7
},
{
"value": "internal study",
"startOffset": 1673,
"endOffset": 1687,
"type": "Skill",
"confidence": 0.5
},
{
"value": "reports",
"startOffset": 1779,
@ -456,6 +428,13 @@
"type": "PersonType",
"confidence": 0.95
},
{
"value": "applicants",
"startOffset": 1964,
"endOffset": 1974,
"type": "PersonType",
"confidence": 0.88
},
{
"value": "data",
"startOffset": 2001,
@ -535,13 +514,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "applicants",
"startOffset": 392,
"endOffset": 402,
"type": "PersonType",
"confidence": 0.52
},
{
"value": "EFSA",
"startOffset": 433,
@ -549,13 +521,6 @@
"type": "Organization",
"confidence": 0.97
},
{
"value": "additives",
"startOffset": 477,
"endOffset": 486,
"type": "Product",
"confidence": 0.5
},
{
"value": "2.2.9.5",
"startOffset": 628,
@ -1039,6 +1004,13 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "Analytical",
"startOffset": 2220,
"endOffset": 2230,
"type": "Skill",
"confidence": 0.87
},
{
"value": "3.3",
"startOffset": 2289,
@ -1074,6 +1046,41 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "Compliance",
"startOffset": 2345,
"endOffset": 2355,
"type": "Skill",
"confidence": 0.64
},
{
"value": "4.9",
"startOffset": 2412,
"endOffset": 2415,
"type": "Quantity",
"confidence": 0.8
},
{
"value": "2",
"startOffset": 2338,
"endOffset": 2339,
"type": "Quantity",
"confidence": 0.8
},
{
"value": "3",
"startOffset": 2340,
"endOffset": 2341,
"type": "Quantity",
"confidence": 0.8
},
{
"value": "10",
"startOffset": 2342,
"endOffset": 2344,
"type": "Quantity",
"confidence": 0.8
},
{
"value": "4.9",
"startOffset": 2412,
@ -1158,20 +1165,6 @@
"type": "Organization",
"confidence": 0.88
},
{
"value": "Food Additives",
"startOffset": 2698,
"endOffset": 2712,
"type": "Product",
"confidence": 0.56
},
{
"value": "Commission",
"startOffset": 2738,
"endOffset": 2748,
"type": "Organization",
"confidence": 0.5
},
{
"value": "234/2011",
"startOffset": 2768,
@ -1305,13 +1298,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "",
"startOffset": 3278,
"endOffset": 3279,
"type": "Quantity",
"confidence": 0.51
},
{
"value": "33",
"startOffset": 3279,
@ -1540,13 +1526,6 @@
"type": "Product",
"confidence": 0.62
},
{
"value": "food",
"startOffset": 505,
"endOffset": 509,
"type": "Product",
"confidence": 0.51
},
{
"value": "food additive",
"startOffset": 602,
@ -1652,13 +1631,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "",
"startOffset": 971,
"endOffset": 972,
"type": "Quantity",
"confidence": 0.51
},
{
"value": "33",
"startOffset": 972,
@ -1819,13 +1791,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "Technical Dossier",
"startOffset": 461,
"endOffset": 478,
"type": "Skill",
"confidence": 0.51
},
{
"value": "1331/20083",
"startOffset": 593,
@ -1840,13 +1805,6 @@
"type": "Skill",
"confidence": 0.76
},
{
"value": "assessment",
"startOffset": 657,
"endOffset": 667,
"type": "Skill",
"confidence": 0.58
},
{
"value": "Data",
"startOffset": 705,
@ -1854,13 +1812,6 @@
"type": "Skill",
"confidence": 0.66
},
{
"value": "application",
"startOffset": 727,
"endOffset": 738,
"type": "Skill",
"confidence": 0.59
},
{
"value": "234/20114",
"startOffset": 796,
@ -1882,13 +1833,6 @@
"type": "Skill",
"confidence": 0.96
},
{
"value": "food",
"startOffset": 906,
"endOffset": 910,
"type": "Product",
"confidence": 0.5
},
{
"value": "EFSA",
"startOffset": 942,
@ -2001,13 +1945,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "applicants",
"startOffset": 1602,
"endOffset": 1612,
"type": "PersonType",
"confidence": 0.59
},
{
"value": "EFSA",
"startOffset": 1788,
@ -2176,13 +2113,6 @@
"type": "Product",
"confidence": 0.62
},
{
"value": "food flavourings",
"startOffset": 2340,
"endOffset": 2356,
"type": "Product",
"confidence": 0.54
},
{
"value": "64",
"startOffset": 2363,
@ -2204,13 +2134,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "24.",
"startOffset": 2384,
"endOffset": 2387,
"type": "Quantity",
"confidence": 0.55
},
{
"value": "http://eur-lex.europa.eu/legalcontent/EN/ALL/?uri=CELEX:32011R0234",
"startOffset": 2406,
@ -2276,13 +2199,6 @@
"type": "Product",
"confidence": 0.98
},
{
"value": "food additive",
"startOffset": 95,
"endOffset": 108,
"type": "Product",
"confidence": 0.53
},
{
"value": "European Union",
"startOffset": 116,
@ -2320,13 +2236,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "Application",
"startOffset": 49,
"endOffset": 60,
"type": "Skill",
"confidence": 0.54
},
{
"value": "support",
"startOffset": 95,
@ -2394,13 +2303,6 @@
"type": "Organization",
"confidence": 0.91
},
{
"value": "ANS Panel",
"startOffset": 194,
"endOffset": 203,
"type": "Organization",
"confidence": 0.58
},
{
"value": "2021",
"startOffset": 205,
@ -2434,14 +2336,21 @@
"startOffset": 472,
"endOffset": 486,
"type": "Product",
"confidence": 0.6
"confidence": 0.89
},
{
"value": "enzymes",
"startOffset": 493,
"value": "food enzymes",
"startOffset": 488,
"endOffset": 500,
"type": "Product",
"confidence": 0.49
"confidence": 0.8
},
{
"value": "food flavourings",
"startOffset": 505,
"endOffset": 521,
"type": "Product",
"confidence": 0.68
},
{
"value": "1",
@ -2499,13 +2408,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "applicant",
"startOffset": 799,
"endOffset": 808,
"type": "PersonType",
"confidence": 0.53
},
{
"value": "1",
"startOffset": 809,
@ -2534,13 +2436,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "manufacture",
"startOffset": 860,
"endOffset": 871,
"type": "Skill",
"confidence": 0.89
},
{
"value": "1",
"startOffset": 872,
@ -2632,19 +2527,19 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "application",
"startOffset": 1018,
"endOffset": 1029,
"type": "Skill",
"confidence": 0.76
},
{
"value": "food additive",
"startOffset": 1051,
"endOffset": 1064,
"type": "Product",
"confidence": 0.78
},
{
"value": "modification",
"startOffset": 1066,
"endOffset": 1078,
"type": "Event",
"confidence": 0.54
"confidence": 0.85
},
{
"value": "1",
@ -2961,13 +2856,6 @@
"type": "Skill",
"confidence": 0.68
},
{
"value": "characteristics",
"startOffset": 2281,
"endOffset": 2296,
"type": "Skill",
"confidence": 0.5
},
{
"value": "2",
"startOffset": 2297,
@ -3192,13 +3080,6 @@
"type": "IPAddress",
"confidence": 0.8
},
{
"value": "Toxicokinetics",
"startOffset": 2974,
"endOffset": 2988,
"type": "Skill",
"confidence": 0.57
},
{
"value": "3.9",
"startOffset": 2989,
@ -3724,13 +3605,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "Specifications",
"startOffset": 934,
"endOffset": 948,
"type": "Skill",
"confidence": 0.55
},
{
"value": "Batch Analysis",
"startOffset": 953,
@ -3801,13 +3675,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "Contaminants",
"startOffset": 1115,
"endOffset": 1127,
"type": "Skill",
"confidence": 0.53
},
{
"value": "3",
"startOffset": 1128,
@ -4382,13 +4249,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "Analytical",
"startOffset": 3117,
"endOffset": 3127,
"type": "Skill",
"confidence": 0.56
},
{
"value": "4.9",
"startOffset": 3223,
@ -4480,13 +4340,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "Cagate",
"startOffset": 3626,
"endOffset": 3632,
"type": "Skill",
"confidence": 0.5
},
{
"value": "2",
"startOffset": 3641,
@ -4613,13 +4466,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "EU",
"startOffset": 4044,
"endOffset": 4046,
"type": "Organization",
"confidence": 0.49
},
{
"value": "234/2011",
"startOffset": 4051,
@ -4646,21 +4492,14 @@
"startOffset": 4161,
"endOffset": 4165,
"type": "Organization",
"confidence": 0.93
},
{
"value": "ANS",
"startOffset": 4166,
"endOffset": 4169,
"type": "Organization",
"confidence": 0.47
"confidence": 0.95
},
{
"value": "2021",
"startOffset": 4177,
"endOffset": 4181,
"type": "DateTime",
"confidence": 0.97
"confidence": 0.98
},
{
"value": "1",
@ -4732,6 +4571,48 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "2",
"startOffset": 4295,
"endOffset": 4296,
"type": "Quantity",
"confidence": 0.8
},
{
"value": "2",
"startOffset": 4297,
"endOffset": 4298,
"type": "Quantity",
"confidence": 0.8
},
{
"value": "1",
"startOffset": 4299,
"endOffset": 4300,
"type": "Quantity",
"confidence": 0.8
},
{
"value": "1",
"startOffset": 4301,
"endOffset": 4302,
"type": "Quantity",
"confidence": 0.8
},
{
"value": "2.2.2.1",
"startOffset": 4347,
"endOffset": 4354,
"type": "IPAddress",
"confidence": 0.8
},
{
"value": "1",
"startOffset": 4355,
"endOffset": 4356,
"type": "Quantity",
"confidence": 0.8
},
{
"value": "2.2.2.1",
"startOffset": 4403,
@ -4746,13 +4627,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "Cassia Gum",
"startOffset": 4427,
"endOffset": 4437,
"type": "Product",
"confidence": 0.73
},
{
"value": "2",
"startOffset": 4459,
@ -4781,13 +4655,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "Cassia Gum",
"startOffset": 4486,
"endOffset": 4496,
"type": "Product",
"confidence": 0.96
},
{
"value": "2",
"startOffset": 4503,
@ -4816,13 +4683,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "Cassia Gum",
"startOffset": 4530,
"endOffset": 4540,
"type": "Product",
"confidence": 0.86
},
{
"value": "3",
"startOffset": 4547,
@ -4852,11 +4712,11 @@
"confidence": 0.8
},
{
"value": "Batch Analysis",
"startOffset": 4555,
"value": "Analysis",
"startOffset": 4561,
"endOffset": 4569,
"type": "Skill",
"confidence": 0.94
"confidence": 0.69
},
{
"value": "16",
@ -4865,13 +4725,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "Cassia",
"startOffset": 4588,
"endOffset": 4594,
"type": "Product",
"confidence": 0.63
},
{
"value": "3.1.2.1",
"startOffset": 4605,
@ -4887,11 +4740,11 @@
"confidence": 0.8
},
{
"value": "Heavy Metal Analysis",
"value": "Heavy Metal",
"startOffset": 4615,
"endOffset": 4635,
"endOffset": 4626,
"type": "Skill",
"confidence": 0.89
"confidence": 0.64
},
{
"value": "21",
@ -4900,13 +4753,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "Cassia",
"startOffset": 4654,
"endOffset": 4660,
"type": "Product",
"confidence": 0.74
},
{
"value": "3.1.2.2",
"startOffset": 4671,
@ -4921,20 +4767,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "Analysis",
"startOffset": 4681,
"endOffset": 4689,
"type": "Skill",
"confidence": 0.6
},
{
"value": "Residual",
"startOffset": 4694,
"endOffset": 4702,
"type": "Skill",
"confidence": 0.52
},
{
"value": "16",
"startOffset": 4715,
@ -4956,13 +4788,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "Microbial Analysis",
"startOffset": 4756,
"endOffset": 4774,
"type": "Skill",
"confidence": 0.97
},
{
"value": "16",
"startOffset": 4779,
@ -5124,20 +4949,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": ".8%",
"startOffset": 5189,
"endOffset": 5192,
"type": "Quantity",
"confidence": 0.89
},
{
"value": "168",
"startOffset": 5209,
"endOffset": 5212,
"type": "Quantity",
"confidence": 0.8
},
{
"value": "3",
"startOffset": 5239,
@ -5194,13 +5005,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "European Union",
"startOffset": 5464,
"endOffset": 5478,
"type": "Organization",
"confidence": 0.69
},
{
"value": "3",
"startOffset": 5485,
@ -5229,6 +5033,13 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "FoodEx2",
"startOffset": 5632,
"endOffset": 5639,
"type": "Organization",
"confidence": 0.81
},
{
"value": "3.9",
"startOffset": 5673,
@ -5271,6 +5082,20 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "Cassia Gum",
"startOffset": 5807,
"endOffset": 5817,
"type": "Product",
"confidence": 0.94
},
{
"value": "European Union",
"startOffset": 5825,
"endOffset": 5839,
"type": "Organization",
"confidence": 0.69
},
{
"value": "3",
"startOffset": 5846,
@ -5304,7 +5129,7 @@
"startOffset": 5969,
"endOffset": 5979,
"type": "Product",
"confidence": 0.95
"confidence": 0.89
}
],
"2.1.6.5": [
@ -5447,13 +5272,6 @@
"type": "DateTime",
"confidence": 0.99
},
{
"value": "Cassia",
"startOffset": 419,
"endOffset": 425,
"type": "Product",
"confidence": 0.58
},
{
"value": "(75°C",
"startOffset": 614,
@ -5461,13 +5279,6 @@
"type": "Quantity",
"confidence": 1.0
},
{
"value": "insoluble",
"startOffset": 622,
"endOffset": 631,
"type": "Skill",
"confidence": 0.53
},
{
"value": "ethanol",
"startOffset": 635,
@ -5482,13 +5293,6 @@
"type": "Product",
"confidence": 0.72
},
{
"value": "solvents",
"startOffset": 683,
"endOffset": 691,
"type": "Product",
"confidence": 0.49
},
{
"value": "3.1",
"startOffset": 729,
@ -5510,13 +5314,6 @@
"type": "Organization",
"confidence": 0.98
},
{
"value": "EINECS",
"startOffset": 806,
"endOffset": 812,
"type": "Organization",
"confidence": 0.58
},
{
"value": "IUPAC",
"startOffset": 878,
@ -5566,13 +5363,6 @@
"type": "Skill",
"confidence": 0.87
},
{
"value": "Mannose",
"startOffset": 1275,
"endOffset": 1282,
"type": "Product",
"confidence": 0.58
},
{
"value": "polymer",
"startOffset": 1334,
@ -5608,12 +5398,26 @@
"type": "Quantity",
"confidence": 0.97
},
{
"value": "1,6",
"startOffset": 1464,
"endOffset": 1467,
"type": "Quantity",
"confidence": 0.8
},
{
"value": "10-12%",
"startOffset": 1524,
"endOffset": 1530,
"type": "Quantity",
"confidence": 0.91
},
{
"value": "5%",
"startOffset": 1579,
"endOffset": 1581,
"type": "Quantity",
"confidence": 0.99
"confidence": 1.0
}
],
"2.1.6.1": [
@ -5631,13 +5435,6 @@
"type": "Product",
"confidence": 0.98
},
{
"value": "additive",
"startOffset": 74,
"endOffset": 82,
"type": "Product",
"confidence": 0.56
},
{
"value": "EU",
"startOffset": 90,
@ -5701,13 +5498,6 @@
"type": "Skill",
"confidence": 0.9
},
{
"value": "gelling",
"startOffset": 359,
"endOffset": 366,
"type": "Skill",
"confidence": 0.57
},
{
"value": "3.6",
"startOffset": 466,
@ -5876,13 +5666,6 @@
"type": "Organization",
"confidence": 0.95
},
{
"value": "food additive",
"startOffset": 1346,
"endOffset": 1359,
"type": "Product",
"confidence": 0.56
},
{
"value": "EFSA ANS Panel",
"startOffset": 1373,
@ -6053,13 +5836,6 @@
"type": "Quantity",
"confidence": 0.8
},
{
"value": "",
"startOffset": 443,
"endOffset": 444,
"type": "Quantity",
"confidence": 0.51
},
{
"value": "33",
"startOffset": 444,

View File

@ -1 +1,247 @@
{"dossierId": "2e41b84e-30ed-4098-b722-ed309a8a5bfb", "fileId": "caf8ba677d05df7a6625449e43c20baf", "targetFileExtension": "SIMPLIFIED_TEXT.json.gz", "responseFileExtension": "NER_ENTITIES.json.gz", "X-TENANT-ID": "redaction", "data": {"2.1.1": [{"value": "Corporation Buckingham Palace Buckingham Palace Road", "startOffset": 73, "endOffset": 125, "type": "DEPARTMENT"}, {"value": "Head of Alliance Management", "startOffset": 194, "endOffset": 221, "type": "JOB_TITEL"}, {"value": "john.smith@smithcorp.com", "startOffset": 246, "endOffset": 270, "type": "MAIL"}], "2.1.2": [{"value": "Corporation Buckingham Palace Buckingham Palace Road", "startOffset": 79, "endOffset": 131, "type": "DEPARTMENT"}, {"value": "Head of Manufacture Tel", "startOffset": 199, "endOffset": 222, "type": "JOB_TITEL"}, {"value": "mimi.lang@smithcorp.com", "startOffset": 243, "endOffset": 266, "type": "MAIL"}], "2.1.3": [{"value": "+44 (0)1252 392460 Email:", "startOffset": 139, "endOffset": 164, "type": "PHONE"}, {"value": "United Kingdom", "startOffset": 338, "endOffset": 352, "type": "COUNTRY"}], "2.1.6.1": [{"value": "EU", "startOffset": 90, "endOffset": 92, "type": "ORG"}, {"value": "EU", "startOffset": 263, "endOffset": 265, "type": "ORG"}, {"value": "European Food Safety Authority", "startOffset": 1280, "endOffset": 1310, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 1373, "endOffset": 1387, "type": "ORG"}], "2.1.6.4": [{"value": "Gidley", "startOffset": 66, "endOffset": 72, "type": "CBI_author"}, {"value": "Dentini", "startOffset": 149, "endOffset": 156, "type": "CBI_author"}, {"value": "Lang", "startOffset": 87, "endOffset": 91, "type": "CBI_author"}, {"value": "Kajiwara", "startOffset": 96, "endOffset": 104, "type": "CBI_author"}, {"value": "Kato", "startOffset": 112, "endOffset": 116, "type": "CBI_author"}, {"value": "Lang", "startOffset": 184, "endOffset": 188, "type": "CBI_author"}], "2.1.9": [{"value": "EFSA", "startOffset": 4101, "endOffset": 4105, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 4161, "endOffset": 4175, "type": "ORG"}, {"value": "Ames", "startOffset": 2392, "endOffset": 2396, "type": "NO_AUTHOR"}], "2.1.10.2": [{"value": "EFSA", "startOffset": 942, "endOffset": 946, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 1002, "endOffset": 1016, "type": "ORG"}, {"value": "EFSA", "startOffset": 1101, "endOffset": 1105, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 1161, "endOffset": 1175, "type": "ORG"}, {"value": "EFSA", "startOffset": 1788, "endOffset": 1792, "type": "ORG"}], "2.1.10.3": [{"value": "EFSA", "startOffset": 129, "endOffset": 133, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 189, "endOffset": 203, "type": "ORG"}], "2.1.10.4": [{"value": "EFSA", "startOffset": 129, "endOffset": 133, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 189, "endOffset": 203, "type": "ORG"}, {"value": "EFSA", "startOffset": 2618, "endOffset": 2622, "type": "ORG"}, {"value": "European Food Safety Authority", "startOffset": 2625, "endOffset": 2655, "type": "ORG"}, {"value": "European Food Safety Authority", "startOffset": 3893, "endOffset": 3923, "type": "ORG"}], "2.1.10.5": [{"value": "EFSA", "startOffset": 129, "endOffset": 133, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 189, "endOffset": 203, "type": "ORG"}], "17": [{"value": "2", "startOffset": 18, "endOffset": 19, "type": "CARDINAL"}]}}
{
"dossierId": "83ca2709-e320-4162-acd2-9cee1f478c92",
"fileId": "21ca7bfb67f80db00d973d43155a0083",
"targetFileExtension": "SIMPLIFIED_TEXT.json.gz",
"responseFileExtension": "NER_ENTITIES.json.gz",
"X-TENANT-ID": "redaction",
"data": {
"2.1.1": [
{
"value": "Corporation Buckingham Palace Buckingham Palace Road",
"startOffset": 73,
"endOffset": 125,
"type": "DEPARTMENT"
},
{
"value": "Head of Alliance Management",
"startOffset": 194,
"endOffset": 221,
"type": "JOB_TITEL"
},
{
"value": "john.smith@smithcorp.com",
"startOffset": 246,
"endOffset": 270,
"type": "MAIL"
}
],
"2.1.2": [
{
"value": "Corporation Buckingham Palace Buckingham Palace Road",
"startOffset": 79,
"endOffset": 131,
"type": "DEPARTMENT"
},
{
"value": "Head of Manufacture Tel",
"startOffset": 199,
"endOffset": 222,
"type": "JOB_TITEL"
},
{
"value": "mimi.lang@smithcorp.com",
"startOffset": 243,
"endOffset": 266,
"type": "MAIL"
}
],
"2.1.3": [
{
"value": "+44 (0)1252 392460 Email:",
"startOffset": 139,
"endOffset": 164,
"type": "PHONE"
},
{
"value": "United Kingdom",
"startOffset": 338,
"endOffset": 352,
"type": "COUNTRY"
}
],
"2.1.6.1": [
{
"value": "EU",
"startOffset": 90,
"endOffset": 92,
"type": "ORG"
},
{
"value": "EU",
"startOffset": 263,
"endOffset": 265,
"type": "ORG"
},
{
"value": "European Food Safety Authority",
"startOffset": 1280,
"endOffset": 1310,
"type": "ORG"
},
{
"value": "EFSA ANS Panel",
"startOffset": 1373,
"endOffset": 1387,
"type": "ORG"
}
],
"2.1.6.4": [
{
"value": "Gidley",
"startOffset": 66,
"endOffset": 72,
"type": "CBI_author"
},
{
"value": "Dentini",
"startOffset": 149,
"endOffset": 156,
"type": "CBI_author"
},
{
"value": "Lang",
"startOffset": 87,
"endOffset": 91,
"type": "CBI_author"
},
{
"value": "Kajiwara",
"startOffset": 96,
"endOffset": 104,
"type": "CBI_author"
},
{
"value": "Kato",
"startOffset": 112,
"endOffset": 116,
"type": "CBI_author"
},
{
"value": "Lang",
"startOffset": 184,
"endOffset": 188,
"type": "CBI_author"
}
],
"2.1.9": [
{
"value": "EFSA",
"startOffset": 4101,
"endOffset": 4105,
"type": "ORG"
},
{
"value": "EFSA ANS Panel",
"startOffset": 4161,
"endOffset": 4175,
"type": "ORG"
},
{
"value": "Ames",
"startOffset": 2392,
"endOffset": 2396,
"type": "NO_AUTHOR"
}
],
"2.1.10.2": [
{
"value": "EFSA",
"startOffset": 942,
"endOffset": 946,
"type": "ORG"
},
{
"value": "EFSA ANS Panel",
"startOffset": 1002,
"endOffset": 1016,
"type": "ORG"
},
{
"value": "EFSA",
"startOffset": 1101,
"endOffset": 1105,
"type": "ORG"
},
{
"value": "EFSA ANS Panel",
"startOffset": 1161,
"endOffset": 1175,
"type": "ORG"
},
{
"value": "EFSA",
"startOffset": 1788,
"endOffset": 1792,
"type": "ORG"
}
],
"2.1.10.3": [
{
"value": "EFSA",
"startOffset": 129,
"endOffset": 133,
"type": "ORG"
},
{
"value": "EFSA ANS Panel",
"startOffset": 189,
"endOffset": 203,
"type": "ORG"
}
],
"2.1.10.4": [
{
"value": "EFSA",
"startOffset": 129,
"endOffset": 133,
"type": "ORG"
},
{
"value": "EFSA ANS Panel",
"startOffset": 189,
"endOffset": 203,
"type": "ORG"
},
{
"value": "EFSA",
"startOffset": 2618,
"endOffset": 2622,
"type": "ORG"
},
{
"value": "European Food Safety Authority",
"startOffset": 2625,
"endOffset": 2655,
"type": "ORG"
},
{
"value": "European Food Safety Authority",
"startOffset": 3893,
"endOffset": 3923,
"type": "ORG"
}
],
"2.1.10.5": [
{
"value": "EFSA",
"startOffset": 129,
"endOffset": 133,
"type": "ORG"
},
{
"value": "EFSA ANS Panel",
"startOffset": 189,
"endOffset": 203,
"type": "ORG"
}
],
"17": [
{
"value": "2",
"startOffset": 18,
"endOffset": 19,
"type": "CARDINAL"
}
]
}
}