RED-9374: Ner Entities are at wrong locations
This commit is contained in:
parent
e0fb825cf7
commit
d116d99db7
@ -14,7 +14,7 @@ import lombok.NoArgsConstructor;
|
||||
@AllArgsConstructor
|
||||
public class MatchedSection {
|
||||
|
||||
private int sectionNumber;
|
||||
private String sectionNumber;
|
||||
private String headline;
|
||||
|
||||
@Builder.Default
|
||||
|
||||
@ -16,7 +16,7 @@ import lombok.NoArgsConstructor;
|
||||
@SuppressWarnings("serial")
|
||||
public class IndexSection implements Serializable {
|
||||
|
||||
private int sectionNumber;
|
||||
private String sectionNumber;
|
||||
private String text;
|
||||
private Set<Integer> pages;
|
||||
private String headline;
|
||||
|
||||
@ -14,7 +14,7 @@ import lombok.NoArgsConstructor;
|
||||
@AllArgsConstructor
|
||||
public class SectionText {
|
||||
|
||||
private int sectionNumber;
|
||||
private String sectionNumber;
|
||||
private String headline;
|
||||
private String text;
|
||||
|
||||
|
||||
@ -310,7 +310,7 @@ public class SearchServiceImpl implements SearchService {
|
||||
|
||||
return MatchedSection.builder()
|
||||
.headline(indexSection.get("headline") != null ? indexSection.getString("headline") : null)
|
||||
.sectionNumber(indexSection.getInt("sectionNumber"))
|
||||
.sectionNumber(indexSection.getString("sectionNumber"))
|
||||
.pages(pages)
|
||||
.matchedTerms(hit.matchedQueries().stream().collect(Collectors.toSet()))
|
||||
.build();
|
||||
|
||||
@ -330,7 +330,7 @@ public class SearchServiceImpl implements SearchService {
|
||||
|
||||
return MatchedSection.builder()
|
||||
.headline(indexSection.get("headline") != null ? indexSection.getString("headline") : null)
|
||||
.sectionNumber(indexSection.getInt("sectionNumber"))
|
||||
.sectionNumber(indexSection.getString("sectionNumber"))
|
||||
.pages(pages)
|
||||
.matchedTerms(hit.matchedQueries().stream().collect(Collectors.toSet()))
|
||||
.build();
|
||||
|
||||
@ -2,204 +2,204 @@
|
||||
"numberOfPages": 9,
|
||||
"sectionTexts": [
|
||||
{
|
||||
"sectionNumber": 1,
|
||||
"sectionNumber": "1",
|
||||
"text": "Rule 0: Expand CBI Authors with firstname initials F. Lastname, J. Doe, M. Mustermann Lastname M., Doe J. Mustermann M."
|
||||
},
|
||||
{
|
||||
"sectionNumber": 2,
|
||||
"sectionNumber": "2",
|
||||
"text": "Rule 1/2: Redact CBI Authors based on Dict Redact when Vertebrate Study is Yes Redact when Vertebrate Study is No David Ksenia Max Mustermann Ranya Eikenboom Charalampos Schenk Tanja Schmitt ← should not be annotated, not in Dictionary"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 3,
|
||||
"sectionNumber": "3",
|
||||
"text": "Rule 3/4: Redact (not) CBI Add/ress based on Dict Dont Redact (mark as skipped) when Vertebrate Study is No Redact when Vertebrate Study is Yes Warnsveld, 7232 CX Warnsveld, Netherlands, NL Institut Industries, 33 Rue Jean Baffier, 18000 Bourges, France, FR 4-6 Chem. des Varennes, 18300 Saint-Satur, France, FR Lesdo Industries, Chäppelisträssli, 6078 Lungern, Switzerland Shlissel'burgskaya Ulitsa, Nizhny Novgorod Oblast, Russia, 603034, RU Karl Johans Gate 11, 0154 Oslo, Norway, NOR ← should not be annotated, not in Dictionary"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 4,
|
||||
"sectionNumber": "4",
|
||||
"text": "Rule 5: Do not redact genitive CBI_authors (Entries based on Dict) Expand to Hint Clarissa’s Donut ← not added to Dict, should be not annotated Simpson's Tower ← added to Authors-Dict, should be annotated"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 5,
|
||||
"sectionNumber": "5",
|
||||
"text": "Reference No Author(s) Year Title Laboratory"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 6,
|
||||
"sectionNumber": "6",
|
||||
"text": "BR2 /2 Michael N. 1998 The role of physical education in the school system. Weyland Industries"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 7,
|
||||
"sectionNumber": "7",
|
||||
"text": "BR3 /5 Funnarie B. 2001 It should be illegal to produce and sell tobacco Authentic Diagnostics"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 8,
|
||||
"sectionNumber": "8",
|
||||
"text": "ZZ/12 Feuer A. 1989 Social media is the real cause of teenage depression. Tyrell Corporation"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 10,
|
||||
"sectionNumber": "10",
|
||||
"text": "Rule 6-11 (Authors Table) Redact when Vertebrate Study is Yes Redact when Vertebrate Study is No"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 11,
|
||||
"sectionNumber": "11",
|
||||
"text": "Rule 12/13: Redact/Hint if CTL or BL was found Redact when Vertebrate Study is Yes Hint when Vertebrate Study is No CTL/without dictionary entry CTL without Slash BL/without dictionary entry BL without Slash CTL/with dictionary entry 1234 with Slash CTL with dictionary entry 5678 without Slash BL/with dictionary entry 1234 with Slash BL with dictionary entry 5678 without Slash"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 12,
|
||||
"sectionNumber": "12",
|
||||
"text": "Rule 14/15: Redact and add recommendation for et al. Redact Term “Desiree”, “Melanie” and add to Recommendation CBI Authors if Vertebrate Study is Yes & No Lorem ipsum dolor sit amet, consectetur adipiscing elit Desiree et al sed do eiusmod tempor incididunt ut labore et dolore magna aliqua Melanie et al. Reference No 12345 Lorem ipsum."
|
||||
},
|
||||
{
|
||||
"sectionNumber": 13,
|
||||
"sectionNumber": "13",
|
||||
"text": "Rule 16/17: Add recommendation for Addresses in Test Organism/Animals sections Recommend only if Vertebrate Study is Yes, else do nothing Lorem ipsum dolor sit Species: Mouse; Source: Stark Industries"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 14,
|
||||
"sectionNumber": "14",
|
||||
"text": "Rule 16/17 (additional) negative Test; missing first Key Nothing should happen because of missing first/second keyword according to the rules Dont redact here because of missing first key; Source: Omni Consumer Products Dont redact here because missing first keyword; Source Resources Development Administration"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 15,
|
||||
"sectionNumber": "15",
|
||||
"text": "Rule 16/17 (additional) negative Test; missing second Key Dont redact here because of missing second key; Species: Mouse; Omni Consumer Products Dont redact here because missing second keyword; Species: Mouse, Resources Development Administration"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 16,
|
||||
"sectionNumber": "16",
|
||||
"text": "Rule 18: Do not redact Names and Addresses if Published Information found Do not redact Names and Addresses if Published Information found Lorem ipsum dolor sit amet Oxford University Press in voluptate velit esse cillum. Iakovos Geiger, Julian Ritter, Asya Lyon, Carina Madsen, Alexandra Häusler, Hanke Mendel, Ranya Eikenboom. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. Min Kwok, Jun K., Tu Wong, Qiang Suen, Zhou Mah, Ning Liu, Lei W. Huang, Ru X. Wu"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 17,
|
||||
"sectionNumber": "17",
|
||||
"text": "Rule 19/20: Redacted PII Personal Identification Information based on Dict Redact when Vertebrate Study is Yes Redact when Vertebrate Study is No Naka-27 Aomachi, Nomi, Ishikawa 923-1101, Japan, JP Sude Halide Nurullah Özgür U. Reyhan B. Rahim C. J. Alfred Xinyi Y. Tao Clara Siegfried ← not added to Dict, should be not annotated"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 18,
|
||||
"sectionNumber": "18",
|
||||
"text": "Rule 21/22: Redact Emails by RegEx Redact when Vertebrate Study is Yes Redact when Vertebrate Study is No Duis aute irure dolor in library@outlook.com reprehenderit in voluptate gordonjcp@msn.com velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint dinther@comcast.net occaecat cupidatat non proident, sunt in kawasaki@me.com culpa qui officia deserunt mollit anim id est laborum."
|
||||
},
|
||||
{
|
||||
"sectionNumber": 19,
|
||||
"sectionNumber": "19",
|
||||
"text": "Description Text Contact Point"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 20,
|
||||
"sectionNumber": "20",
|
||||
"text": "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum Contact Point dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. Contact point: Central Research Industry Phone: +49 2113 2311 563 Fax: +49 2113 2311 560 Tel.: +81 764770164 Tel: +81 6653 44563 E-mail: Seriknowmobil@co.uk Email: maximiliamschmitt@arcor.de e-mail: maximiliamschmitt@t-online.de E-mail address: example@mail.com Contact: Maximiliam Schmitt Telephone number: +27414328992 Telephone No: +274 1432 8991 Fax number: +274 1432 8990 Telephone: +274 34223331 Phone No. +274 1432 8933 Contact: 493 1223 4592 European contact: European Central Institute Alternative contact: Emilia Lockhart Alternative contact: Cyberdyne Systems Tower Defense 121a Hong Kong, BT District"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 22,
|
||||
"sectionNumber": "22",
|
||||
"text": "Rule 23/24: Redact contact information (contains \"Contact point:\") Redact when Vertebrate Study is Yes Redact when Vertebrate Study is No “Contact-Information was found should be appears”"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 23,
|
||||
"sectionNumber": "23",
|
||||
"text": "Description Text Applicant"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 24,
|
||||
"sectionNumber": "24",
|
||||
"text": "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum Contact Point dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. Contact point: Central Research Industry Phone: +49 2113 2311 563 Fax: +49 2113 2311 560 Tel.: +81 764770164 Tel: +81 6653 44563 E-mail: Seriknowmobil@co.uk Email: maximiliamschmitt@arcor.de e-mail: maximiliamschmitt@t-online.de E-mail address: example@mail.com Contact: Maximiliam Schmitt Telephone number: +27414328992 Telephone No: +274 1432 8991 Fax number: +274 1432 8990 Telephone: +274 34223331 Phone No. +274 1432 8933 Contact: 493 1223 4592 European contact: European Central Institute Alternative contact: Emilia Lockhart Alternative contact: Cyberdyne Systems Tower Defense 121a Hong Kong, BT District"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 26,
|
||||
"sectionNumber": "26",
|
||||
"text": "Rule 25/26: Redact contact information (contains \"Applicant\" as Headline or Text) Redact when Vertebrate Study is Yes Redact when Vertebrate Study is No “Applicant Information was found should be appears” Applicant Name: Soylent Corporation Contact point: Riddley Scott Address: 359-21 Huam-dong Yongsan-gu Seoul, South Korea Phone: +82 122 34188 Fax: +82 122 34180 E-mail: food-industry@korea.com Contact: This is a special case, everything between this and the next keyword should be redacted Tel.: +275 5678 1234 132 fsdfdfre frefref"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 27,
|
||||
"sectionNumber": "27",
|
||||
"text": "Rule 27/28: Redact contact Information (contains Producer) Redact when Vertebrate Study is Yes Redact when Vertebrate Study is No “Producer was found” should be appears Producer of the plant production Name: Umbrella Corporation Contact: Jill Valentine Address: 359-21 Huam-dong Yongsan-gu Seoul, South Korea Phone: +82 122 34188 Fax: +82 122 34180 E-mail: pharma-industry@korea.com"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 28,
|
||||
"sectionNumber": "28",
|
||||
"text": "Rule 29/30/31/32: If Text contains \"AUTHORS:\" and \"COMPLETION DATES\" but not \"STUDY COMPLETION DATES\", then Redact between both Redact when Vertebrate Study is Yes Redact when Vertebrate Study is No Study Report___ AUTHOR(S): Dr. Alan Grant COMPLETION DATE: 02 December 1997"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 29,
|
||||
"sectionNumber": "29",
|
||||
"text": "Rule 29/30/31/32: (additional) negative Test for Study completion dates No Redaction should be appears here Study Report___ AUTHOR(S): Dr. Alan Grant STUDY COMPLETION DATE: 02 December 1997"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 30,
|
||||
"sectionNumber": "30",
|
||||
"text": "Rule 33/34: If Text contains \"Performing Lab\" and \"Lab Project ID\", then Redact everything between Redact when Vertebrate Study is Yes Redact when Vertebrate Study is No Study Report___ PERFORMING LABORATORY: Umbrella Corporation LABORATORY PROJECT ID: Number 20201/33991/ERZAT/21"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 31,
|
||||
"sectionNumber": "31",
|
||||
"text": "Rule 35/36/37/38: ?? Tba"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 32,
|
||||
"sectionNumber": "32",
|
||||
"text": "Rule 39: Purity Hint Add Purity as Hint when Percent-Numbers is there Test Item: Soda Purity: 45% ← should be Hint Purity: <45% ← should be Hint Purity: >45% ← should be Hint Purity: 101% ← should ne be Hint because >100 % is not possible Purity: =>45% ← should be not Hint because additional symbols Purity: =<45% ← should be not Hint because additional symbols Purity: aa 45% ← should be not Hint because additional symbols Purity: 45% aa ← should be not Hint because additional symbols Purity: aa45% ← should be not Hint because additional symbols Purity: 45%aa ← should be not Hint because additional symbols Product-Code: EAK-L443 purity: 99% ← not Hint because case sensitive purity: >99% ← not Hint because case sensitive purity: <99% ← not Hint because case sensitive Supplier: GreenForce"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 33,
|
||||
"sectionNumber": "33",
|
||||
"text": "Rule 40: Ignore Dossier-Redaction if Confidentiality is not set Dont redact Dossier-Redaction if Confidentiality is not set in file attributes Excepteur sint occaecat cupidatat non proident, myDossierRedaction sunt in culpa qui officia deserunt mollit anim id est laborum."
|
||||
},
|
||||
{
|
||||
"sectionNumber": 34,
|
||||
"sectionNumber": "34",
|
||||
"text": "Rule 41/42: Redact Signatures Redact when Vertebrate Study is Yes Redact when Vertebrate Study is No __________________________ __________________________ Signed by: Dilara Sonnenschein Signed by: Tobias Müller"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 35,
|
||||
"sectionNumber": "35.1.1.3",
|
||||
"text": "Rule 43: Redact Logo Redact Logo only if Vertebrate Study is Yes, else do nothing (skipped)"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 36,
|
||||
"sectionNumber": "36",
|
||||
"text": "This is a Page-Header"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 37,
|
||||
"sectionNumber": "37",
|
||||
"text": "This is a Page-Header"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 38,
|
||||
"sectionNumber": "38",
|
||||
"text": "This is a Page-Header"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 39,
|
||||
"sectionNumber": "39",
|
||||
"text": "This is a Page-Header"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 40,
|
||||
"sectionNumber": "40",
|
||||
"text": "This is a Page-Header"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 41,
|
||||
"sectionNumber": "41",
|
||||
"text": "This is a Page-Header"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 42,
|
||||
"sectionNumber": "42",
|
||||
"text": "This is a Page-Header"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 43,
|
||||
"sectionNumber": "43",
|
||||
"text": "This is a Page-Header"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 44,
|
||||
"sectionNumber": "44",
|
||||
"text": "This is a Page-Header"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 45,
|
||||
"sectionNumber": "45",
|
||||
"text": "This is a Page-Footer"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 46,
|
||||
"sectionNumber": "46",
|
||||
"text": "This is a Page-Footer"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 47,
|
||||
"sectionNumber": "47",
|
||||
"text": "This is a Page-Footer"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 48,
|
||||
"sectionNumber": "48",
|
||||
"text": "This is a Page-Footer"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 49,
|
||||
"sectionNumber": "49",
|
||||
"text": "This is a Page-Footer"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 50,
|
||||
"sectionNumber": "50",
|
||||
"text": "This is a Page-Footer"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 51,
|
||||
"sectionNumber": "51",
|
||||
"text": "This is a Page-Footer"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 52,
|
||||
"sectionNumber": "52",
|
||||
"text": "This is a Page-Footer"
|
||||
},
|
||||
{
|
||||
"sectionNumber": 53,
|
||||
"sectionNumber": "53",
|
||||
"text": "This is a Page-Footer"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user