Deprecated class and removed the sentence splitting.

Instead of creating a component per sentence, the system now creates one component per entity.
This commit is contained in:
Kresnadi Budisantoso 2023-09-29 19:34:51 +02:00
parent 2d4b32293f
commit fc8b51603c

View File

@ -1,6 +1,5 @@
package com.iqser.red.service.redaction.report.v1.server.service;
import java.text.BreakIterator;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.*;
@ -32,6 +31,7 @@ import lombok.SneakyThrows;
@Service
@RequiredArgsConstructor
@Deprecated(forRemoval = true)
public class RSSPoc2Service {
public static final String STUDY_TYPE_NUMBER = "Study_Type_Number";
@ -45,7 +45,7 @@ public class RSSPoc2Service {
public static final String TEST_GUIDELINE_1 = "Test_Guidelines_1";
public static final String TEST_GUIDELINE_2 = "Test_Guidelines_2";
public static final String STUDY_CONCLUSION = "Study_Conclusion";
public static final String DEVIATION_FROM_THE_GUIDELINE_SENTENCE = "Deviation_from_the_Guideline_";
public static final String DEVIATION_FROM_THE_GUIDELINE = "Deviation_from_the_Guideline";
public static final String SPECIES = "Species";
public static final String STRAIN = "Strain";
public static final String CONCLUSION_LD_50_MG_PER_MG = "Conclusion_LD50_mg_per_kg";
@ -53,24 +53,23 @@ public class RSSPoc2Service {
public static final String CONCLUSION_MINIMUM_CONFIDENCE = "Conclusion_Minimum_Confidence";
public static final String CONCLUSION_MAXIMUM_CONFIDENCE = "Conclusion_Maximum_Confidence";
public static final String NECROPSY_FINDINGS = "Necropsy_Findings";
public static final String NECROPSY_FINDINGS_SENTENCE = "Necropsy_Findings_";
public static final String CONDUCTED_WITHIN_4_HOURS_OF_EXPOSURE = "Conducted_with_4_Hours_of_Exposure";
public static final String STUDY_DESIGN = "Study_Design";
public static final String RESULTS_AND_CONCLUSIONS = "Results_and_Conclusions";
public static final String WEIGHT_BEHAVIOR_CHANGES_SENTENCE = "Weight_Behavior_Changes_";
public static final String WEIGHT_BEHAVIOR_CHANGES = "Weight_Behavior_Changes";
public static final String MORTALITY_STATEMENT = "Mortality_Statement";
public static final String CLINICAL_OBSERVATIONS_SENTENCE = "Clinical_Observations_";
public static final String BODY_WEIGHT_CHANGES_SENTENCE = "Body_Weight_Changes_";
public static final String CLINICAL_OBSERVATIONS = "Clinical_Observations";
public static final String BODY_WEIGHT_CHANGES = "Body_Weight_Changes";
public static final String DETAILING_OF_REPORTED_CHANGES = "Detailing_of_Reported_Changes";
public static final String SEX = "Sex";
public static final String NUMBER_OF_ANIMALS = "Number_of_Animals";
public static final String CLINCAL_SIGNS_SENTENCE = "Clincal_Signs_";
public static final String CLINCAL_SIGNS = "Clincal_Signs";
public static final String MORTALITY = "Mortality";
public static final String DOSAGES = "Dosages";
public static final String PRELIMINARY_TEST_RESULTS_SENTENCE = "Preliminary_Test_Results_";
public static final String PRELIMINARY_TEST_RESULTS = "Preliminary_Test_Results";
public static final String TEST_RESULTS = "Test_Results";
public static final String WAS_THE_DEFINITIVE_STUDY_CONDUCTED_WITH_POSITIVE_CONTROL = "Was_the_definitive_study_conducted_with_positive_control";
public static final String DOSE_MORTALITY_SENTENCE = "Dose_Mortality_";
public static final String DOSE_MORTALITY = "Dose_Mortality";
public static final String RESULTS_MAIN_STUDY = "Results_Main_Study";
public static final String WHAT_WAS_THE_APPROACH_USED = "What_was_the_approach_used";
public static final String DOSES_MG_PER_KG_BW = "Doses_mg_per_kg_bw";
@ -157,7 +156,6 @@ public class RSSPoc2Service {
continue;
}
resultMap.put(STUDY_TITLE, getFirstEntryOrElse(redactionLog, "title", ""));
resultMap.put(REPORT_NUMBER, getFirstEntryOrElse(redactionLog, "report_number", ""));
resultMap.put(PERFORMING_LABORATORY, getPerformingLaboratory(redactionLog));
@ -172,16 +170,7 @@ public class RSSPoc2Service {
resultMap.put(STUDY_CONCLUSION, combineValuesOfFirstFoundSection(redactionLog, "study_conclusion", " ", ""));
var guidelineDeviationSentences = getAsSentences(redactionLog, "guideline_deviation");
int i = 1;
for (SCMComponent guidelineDeviationSentence : guidelineDeviationSentences) {
resultMap.put(DEVIATION_FROM_THE_GUIDELINE_SENTENCE + i, guidelineDeviationSentence);
i++;
}
if (guidelineDeviationSentences.isEmpty()) {
resultMap.put(DEVIATION_FROM_THE_GUIDELINE_SENTENCE + 1,
SCMComponent.builder().originalValue("").transformation(String.format("Values of type '%s' as sentences", "guideline_deviation")).build());
}
resultMap.putAll(getAsBlockPerAnnotation(redactionLog, "guideline_deviation", DEVIATION_FROM_THE_GUIDELINE));
}
if (oecdIn(oecd, Set.of("402", "403", "404", "405", "425", "429", "436", "471"))) {
@ -204,118 +193,60 @@ public class RSSPoc2Service {
if (oecdIn(oecd, Set.of("403", "436"))) {
var sentences = getAsSentences(redactionLog, "necropsy_findings");
int i = 1;
for (SCMComponent sentence : sentences) {
resultMap.put(NECROPSY_FINDINGS_SENTENCE + i, sentence);
i++;
}
if (sentences.isEmpty()) {
resultMap.put(NECROPSY_FINDINGS_SENTENCE + 1,
SCMComponent.builder().originalValue("").transformation(String.format("Values of type '%s' as sentences", "necropsy_findings")).build());
}
resultMap.putAll(getAsBlockPerAnnotation(redactionLog, "necropsy_findings", NECROPSY_FINDINGS));
}
if (oecdIn(oecd, Set.of("403", "436"))) {
resultMap.put(CONDUCTED_WITHIN_4_HOURS_OF_EXPOSURE, getAsOneBlock(redactionLog, "4h_exposure"));
}
if (oecdIn(oecd, Set.of("404", "405", "429", "406", "428", "438", "439", "474", "487"))) {
resultMap.put(STUDY_DESIGN, getAsOneBlock(redactionLog, "study_design"));
}
if (oecdIn(oecd, Set.of("406", "428", "438", "439", "474", "487"))) {
resultMap.put(RESULTS_AND_CONCLUSIONS, getJoinedValues(redactionLog, "results_and_conclusion", " "));
}
if (oecdIn(oecd, Set.of("402"))) {
var weightBehaviorChangesSentences = getAsSentences(redactionLog, "weight_behavior_changes");
int i = 1;
for (SCMComponent sentence : weightBehaviorChangesSentences) {
resultMap.put(WEIGHT_BEHAVIOR_CHANGES_SENTENCE + i, sentence);
i++;
}
if (weightBehaviorChangesSentences.isEmpty()) {
resultMap.put(WEIGHT_BEHAVIOR_CHANGES_SENTENCE + 1,
SCMComponent.builder().originalValue("").transformation(String.format("Values of type '%s' as sentences", "weight_behavior_changes")).build());
}
resultMap.putAll(getAsBlockPerAnnotation(redactionLog, "weight_behavior_changes", WEIGHT_BEHAVIOR_CHANGES));
resultMap.put(MORTALITY_STATEMENT, getAsOneBlock(redactionLog, "mortality_statement"));
}
if (oecdIn(oecd, Set.of("403"))) {
var sentences = getAsSentences(redactionLog, "clinical_observations");
int i = 1;
for (SCMComponent sentence : sentences) {
resultMap.put(CLINICAL_OBSERVATIONS_SENTENCE + i, sentence);
i++;
}
if (sentences.isEmpty()) {
resultMap.put(CLINICAL_OBSERVATIONS_SENTENCE + 1,
SCMComponent.builder().originalValue("").transformation(String.format("Values of type '%s' as sentences", "clinical_observations")).build());
}
sentences = getAsSentences(redactionLog, "bodyweight_changes");
i = 1;
for (SCMComponent sentence : sentences) {
resultMap.put(BODY_WEIGHT_CHANGES_SENTENCE + i, sentence);
i++;
}
if (sentences.isEmpty()) {
resultMap.put(BODY_WEIGHT_CHANGES_SENTENCE + 1,
SCMComponent.builder().originalValue("").transformation(String.format("Values of type '%s' as sentences", "bodyweight_changes")).build());
}
resultMap.putAll(getAsBlockPerAnnotation(redactionLog, "clinical_observations", CLINICAL_OBSERVATIONS));
resultMap.putAll(getAsBlockPerAnnotation(redactionLog, "bodyweight_changes", BODY_WEIGHT_CHANGES));
}
if (oecdIn(oecd, Set.of("404", "405"))) {
resultMap.put(DETAILING_OF_REPORTED_CHANGES, getAsOneBlock(redactionLog, "detailing"));
}
if (oecdIn(oecd, Set.of("405", "429"))) {
resultMap.put(SEX, getSex(redactionLog));
resultMap.put(NUMBER_OF_ANIMALS, getNumberOfAnimals(redactionLog));
}
if (oecdIn(oecd, Set.of("425"))) {
var clinicalSignsSentences = getAsSentences(redactionLog, "clinical_signs");
int i = 1;
for (SCMComponent clinicalSignsSentence : clinicalSignsSentences) {
resultMap.put(CLINCAL_SIGNS_SENTENCE + i, clinicalSignsSentence);
i++;
}
if (clinicalSignsSentences.isEmpty()) {
resultMap.put(CLINCAL_SIGNS_SENTENCE + 1,
SCMComponent.builder().originalValue("").transformation(String.format("Values of type '%s' as sentences", "clinical_signs")).build());
}
var doseMortalitySentences = getDoseMortality(redactionLog);
i = 1;
for (SCMComponent sentence : doseMortalitySentences) {
resultMap.put(DOSE_MORTALITY_SENTENCE + i, sentence);
i++;
}
if (doseMortalitySentences.isEmpty()) {
resultMap.put(DOSE_MORTALITY_SENTENCE + 1,
SCMComponent.builder().originalValue("").transformation("Combine values of 'dose_mortality' and 'dose_mortality_dose' of same row with ', ' ").build());
}
resultMap.putAll(getAsBlockPerAnnotation(redactionLog, "clinical_signs", CLINCAL_SIGNS));
resultMap.putAll(getDoseMortality(redactionLog, DOSE_MORTALITY));
resultMap.put(MORTALITY, getAsOneBlock(redactionLog, "mortality"));
resultMap.put(DOSAGES, getFirstEntryOrElse(redactionLog, "dosages", ""));
}
if (oecdIn(oecd, Set.of("429"))) {
var sentences = getAsSentences(redactionLog, "preliminary_test_results");
int i = 1;
for (SCMComponent sentence : sentences) {
resultMap.put(PRELIMINARY_TEST_RESULTS_SENTENCE + i, sentence);
i++;
}
if (sentences.isEmpty()) {
resultMap.put(PRELIMINARY_TEST_RESULTS_SENTENCE + 1,
SCMComponent.builder().originalValue("").transformation(String.format("Values of type '%s' as sentences", "preliminary_test_results")).build());
}
resultMap.putAll(getAsBlockPerAnnotation(redactionLog, "preliminary_test_results", PRELIMINARY_TEST_RESULTS));
resultMap.put(TEST_RESULTS, getAsOneBlock(redactionLog, "test_results"));
resultMap.put(WAS_THE_DEFINITIVE_STUDY_CONDUCTED_WITH_POSITIVE_CONTROL, getAsOneBlock(redactionLog, "positive_control"));
@ -336,6 +267,34 @@ public class RSSPoc2Service {
}
private static Map<String, SCMComponent> componentListToMap(List<SCMComponent> clinicalSignsList, String componentKey, String transformation) {
if (clinicalSignsList.size() == 0) {
return Map.of(componentKey, getEmptyComponent(transformation));
}
if (clinicalSignsList.size() == 1) {
return Map.of(componentKey, clinicalSignsList.get(0));
}
int i = 1;
Map<String, SCMComponent> results = new LinkedHashMap<>();
for (SCMComponent block : clinicalSignsList) {
results.put(String.format("%s_%s", componentKey, i++), block);
}
return results;
}
private static SCMComponent getEmptyComponent(String transformation) {
return SCMComponent.builder().originalValue("").transformation(transformation).build();
}
private Map<String, SCMComponent> getAllEntities(RedactionLog redactionLog){
Map<String, SCMComponent> resultMap = new HashMap<>();
@ -423,7 +382,7 @@ public class RSSPoc2Service {
var laboratoryEntry = redactionLog.getRedactionLogEntry().stream().filter(r -> r.getType().equals("laboratory_name")).findFirst();
if (!laboratoryEntry.isPresent()) {
return SCMComponent.builder().originalValue("").transformation(transformation).build();
return getEmptyComponent(transformation);
}
var laboratoryCountry = redactionLog.getRedactionLogEntry()
@ -540,7 +499,7 @@ public class RSSPoc2Service {
.build();
}
return SCMComponent.builder().originalValue("").transformation(transformation).build();
return getEmptyComponent(transformation);
}
@ -562,7 +521,7 @@ public class RSSPoc2Service {
.build();
}
return SCMComponent.builder().originalValue("").transformation(transformation).build();
return getEmptyComponent(transformation);
}
@ -572,34 +531,23 @@ public class RSSPoc2Service {
}
private List<SCMComponent> getAsSentences(RedactionLog redactionLog, String type) {
private Map<String, SCMComponent> getAsBlockPerAnnotation(RedactionLog redactionLog, String type, String componentKey) {
String transformation = String.format("Values of type '%s' as sentences", type);
String transformation = String.format("Values of type '%s'", type);
List<SCMComponent> sentences = new ArrayList<>();
List<SCMComponent> scmComponents = new ArrayList<>();
var typeStringsEntries = redactionLog.getRedactionLogEntry().stream().filter(r -> r.getType().equals(type)).collect(Collectors.toList());
if (typeStringsEntries.isEmpty()) {
return sentences;
}
for (RedactionLogEntry typeStringEntry : typeStringsEntries) {
BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US);
iterator.setText(typeStringEntry.getValue());
int start = iterator.first();
for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) {
sentences.add(SCMComponent.builder()
.originalValue(typeStringEntry.getValue().substring(start, end).replaceAll("\\n", "").trim())
.scmAnnotations(List.of(toScmAnnotations(typeStringEntry)))
.transformation(transformation)
.build());
}
scmComponents.add(SCMComponent.builder()
.originalValue(typeStringEntry.getValue().replaceAll("\\n", "").trim())
.scmAnnotations(List.of(toScmAnnotations(typeStringEntry)))
.transformation(transformation)
.build());
}
return sentences;
return componentListToMap(scmComponents, componentKey, transformation);
}
@ -707,7 +655,7 @@ public class RSSPoc2Service {
var uniqueAnimalNumbers = redactionLog.getRedactionLogEntry().stream().filter(r -> r.getType().equals("animal_number")).collect(Collectors.toSet());
if (uniqueAnimalNumbers.isEmpty()) {
return SCMComponent.builder().originalValue("").transformation(transformation).build();
return getEmptyComponent(transformation);
}
int size = uniqueAnimalNumbers.stream().map(a -> a.getValue()).collect(Collectors.toSet()).size();
@ -720,7 +668,7 @@ public class RSSPoc2Service {
}
private List<SCMComponent> getDoseMortality(RedactionLog redactionLog) {
private Map<String, SCMComponent> getDoseMortality(RedactionLog redactionLog, String componentKey) {
String transformation = "Combine values of 'dose_mortality' and 'dose_mortality_dose' of same row with ', ' ";
@ -764,7 +712,7 @@ public class RSSPoc2Service {
}
return result;
return componentListToMap(result, componentKey, transformation);
}
@ -802,9 +750,9 @@ public class RSSPoc2Service {
resultMap.put(STRAIN, components.get(STRAIN));
resultMap.put(DOSAGES, components.get(DOSAGES));
resultMap.put(MORTALITY, components.get(MORTALITY));
resultMap.putAll(getKeyContains(components, DOSE_MORTALITY_SENTENCE));
resultMap.putAll(getKeyContains(components, CLINCAL_SIGNS_SENTENCE));
resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE_SENTENCE));
resultMap.putAll(getKeyContains(components, DOSE_MORTALITY));
resultMap.putAll(getKeyContains(components, CLINCAL_SIGNS));
resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE));
resultMap.put(CONCLUSION_LD_50_GREATER_THAN, components.get(CONCLUSION_LD_50_GREATER_THAN));
resultMap.put(CONCLUSION_LD_50_MG_PER_MG, components.get(CONCLUSION_LD_50_MG_PER_MG));
resultMap.put(CONCLUSION_MINIMUM_CONFIDENCE, components.get(CONCLUSION_MINIMUM_CONFIDENCE));
@ -816,9 +764,9 @@ public class RSSPoc2Service {
resultMap.put(STRAIN, components.get(STRAIN));
resultMap.put(DOSES_MG_PER_KG_BW, components.get(DOSES_MG_PER_KG_BW));
resultMap.put(MORTALITY_STATEMENT, components.get(MORTALITY_STATEMENT));
resultMap.putAll(getKeyContains(components, WEIGHT_BEHAVIOR_CHANGES_SENTENCE));
resultMap.putAll(getKeyContains(components, WEIGHT_BEHAVIOR_CHANGES));
resultMap.put(NECROPSY_FINDINGS, components.get(NECROPSY_FINDINGS));
resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE_SENTENCE));
resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE));
resultMap.put(CONCLUSION_LD_50_GREATER_THAN, components.get(CONCLUSION_LD_50_GREATER_THAN));
resultMap.put(CONCLUSION_LD_50_MG_PER_MG, components.get(CONCLUSION_LD_50_MG_PER_MG));
resultMap.put(CONCLUSION_MINIMUM_CONFIDENCE, components.get(CONCLUSION_MINIMUM_CONFIDENCE));
@ -830,10 +778,10 @@ public class RSSPoc2Service {
resultMap.put(SPECIES, components.get(SPECIES));
resultMap.put(STRAIN, components.get(STRAIN));
resultMap.put(CONDUCTED_WITHIN_4_HOURS_OF_EXPOSURE, components.get(CONDUCTED_WITHIN_4_HOURS_OF_EXPOSURE));
resultMap.putAll(getKeyContains(components, CLINICAL_OBSERVATIONS_SENTENCE));
resultMap.putAll(getKeyContains(components, BODY_WEIGHT_CHANGES_SENTENCE));
resultMap.putAll(getKeyContains(components, NECROPSY_FINDINGS_SENTENCE));
resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE_SENTENCE));
resultMap.putAll(getKeyContains(components, CLINICAL_OBSERVATIONS));
resultMap.putAll(getKeyContains(components, BODY_WEIGHT_CHANGES));
resultMap.putAll(getKeyContains(components, NECROPSY_FINDINGS));
resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE));
resultMap.put(CONCLUSION_LD_50_GREATER_THAN, components.get(CONCLUSION_LD_50_GREATER_THAN));
resultMap.put(CONCLUSION_LD_50_MG_PER_MG, components.get(CONCLUSION_LD_50_MG_PER_MG));
resultMap.put(CONCLUSION_MINIMUM_CONFIDENCE, components.get(CONCLUSION_MINIMUM_CONFIDENCE));
@ -847,7 +795,7 @@ public class RSSPoc2Service {
resultMap.put(NUMBER_OF_ANIMALS, components.get(NUMBER_OF_ANIMALS));
resultMap.put(STUDY_DESIGN, components.get(STUDY_DESIGN));
resultMap.put(DETAILING_OF_REPORTED_CHANGES, components.get(DETAILING_OF_REPORTED_CHANGES));
resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE_SENTENCE));
resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE));
resultMap.put(STUDY_CONCLUSION, components.get(STUDY_CONCLUSION));
break;
case "429":
@ -856,12 +804,12 @@ public class RSSPoc2Service {
resultMap.put(WAS_THE_DEFINITIVE_STUDY_CONDUCTED_WITH_POSITIVE_CONTROL, components.get(WAS_THE_DEFINITIVE_STUDY_CONDUCTED_WITH_POSITIVE_CONTROL));
resultMap.put(STUDY_DESIGN, components.get(STUDY_DESIGN));
resultMap.put(RESULTS_MAIN_STUDY, components.get(RESULTS_MAIN_STUDY));
resultMap.putAll(getKeyContains(components, PRELIMINARY_TEST_RESULTS_SENTENCE));
resultMap.putAll(getKeyContains(components, PRELIMINARY_TEST_RESULTS));
resultMap.put(WHAT_WAS_THE_APPROACH_USED, components.get(WHAT_WAS_THE_APPROACH_USED));
resultMap.put(SEX, components.get(SEX));
resultMap.put(NUMBER_OF_ANIMALS, components.get(NUMBER_OF_ANIMALS));
resultMap.put(TEST_RESULTS, components.get(TEST_RESULTS));
resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE_SENTENCE));
resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE));
resultMap.put(STUDY_CONCLUSION, components.get(STUDY_CONCLUSION));
break;
case "404":
@ -869,12 +817,12 @@ public class RSSPoc2Service {
resultMap.put(STRAIN, components.get(STRAIN));
resultMap.put(STUDY_DESIGN, components.get(STUDY_DESIGN));
resultMap.put(DETAILING_OF_REPORTED_CHANGES, components.get(DETAILING_OF_REPORTED_CHANGES));
resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE_SENTENCE));
resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE));
resultMap.put(STUDY_CONCLUSION, components.get(STUDY_CONCLUSION));
break;
case "471":
resultMap.put(STRAIN, components.get(STRAIN));
resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE_SENTENCE));
resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE));
resultMap.put(STUDY_CONCLUSION, components.get(STUDY_CONCLUSION));
break;
}