From fc8b51603c3d9f87eff3261ebfed23ea2ae06534 Mon Sep 17 00:00:00 2001 From: Kresnadi Budisantoso Date: Fri, 29 Sep 2023 19:34:51 +0200 Subject: [PATCH] Deprecated class and removed the sentence splitting. Instead of creating a component per sentence, the system now creates one component per entity. --- .../v1/server/service/RSSPoc2Service.java | 212 +++++++----------- 1 file changed, 80 insertions(+), 132 deletions(-) diff --git a/redaction-report-service-v1/redaction-report-service-server-v1/src/main/java/com/iqser/red/service/redaction/report/v1/server/service/RSSPoc2Service.java b/redaction-report-service-v1/redaction-report-service-server-v1/src/main/java/com/iqser/red/service/redaction/report/v1/server/service/RSSPoc2Service.java index 97d26c9..9b914f0 100644 --- a/redaction-report-service-v1/redaction-report-service-server-v1/src/main/java/com/iqser/red/service/redaction/report/v1/server/service/RSSPoc2Service.java +++ b/redaction-report-service-v1/redaction-report-service-server-v1/src/main/java/com/iqser/red/service/redaction/report/v1/server/service/RSSPoc2Service.java @@ -1,6 +1,5 @@ package com.iqser.red.service.redaction.report.v1.server.service; -import java.text.BreakIterator; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.*; @@ -32,6 +31,7 @@ import lombok.SneakyThrows; @Service @RequiredArgsConstructor +@Deprecated(forRemoval = true) public class RSSPoc2Service { public static final String STUDY_TYPE_NUMBER = "Study_Type_Number"; @@ -45,7 +45,7 @@ public class RSSPoc2Service { public static final String TEST_GUIDELINE_1 = "Test_Guidelines_1"; public static final String TEST_GUIDELINE_2 = "Test_Guidelines_2"; public static final String STUDY_CONCLUSION = "Study_Conclusion"; - public static final String DEVIATION_FROM_THE_GUIDELINE_SENTENCE = "Deviation_from_the_Guideline_"; + public static final String DEVIATION_FROM_THE_GUIDELINE = "Deviation_from_the_Guideline"; public static final String SPECIES = "Species"; public static final String STRAIN = "Strain"; public static final String CONCLUSION_LD_50_MG_PER_MG = "Conclusion_LD50_mg_per_kg"; @@ -53,24 +53,23 @@ public class RSSPoc2Service { public static final String CONCLUSION_MINIMUM_CONFIDENCE = "Conclusion_Minimum_Confidence"; public static final String CONCLUSION_MAXIMUM_CONFIDENCE = "Conclusion_Maximum_Confidence"; public static final String NECROPSY_FINDINGS = "Necropsy_Findings"; - public static final String NECROPSY_FINDINGS_SENTENCE = "Necropsy_Findings_"; public static final String CONDUCTED_WITHIN_4_HOURS_OF_EXPOSURE = "Conducted_with_4_Hours_of_Exposure"; public static final String STUDY_DESIGN = "Study_Design"; public static final String RESULTS_AND_CONCLUSIONS = "Results_and_Conclusions"; - public static final String WEIGHT_BEHAVIOR_CHANGES_SENTENCE = "Weight_Behavior_Changes_"; + public static final String WEIGHT_BEHAVIOR_CHANGES = "Weight_Behavior_Changes"; public static final String MORTALITY_STATEMENT = "Mortality_Statement"; - public static final String CLINICAL_OBSERVATIONS_SENTENCE = "Clinical_Observations_"; - public static final String BODY_WEIGHT_CHANGES_SENTENCE = "Body_Weight_Changes_"; + public static final String CLINICAL_OBSERVATIONS = "Clinical_Observations"; + public static final String BODY_WEIGHT_CHANGES = "Body_Weight_Changes"; public static final String DETAILING_OF_REPORTED_CHANGES = "Detailing_of_Reported_Changes"; public static final String SEX = "Sex"; public static final String NUMBER_OF_ANIMALS = "Number_of_Animals"; - public static final String CLINCAL_SIGNS_SENTENCE = "Clincal_Signs_"; + public static final String CLINCAL_SIGNS = "Clincal_Signs"; public static final String MORTALITY = "Mortality"; public static final String DOSAGES = "Dosages"; - public static final String PRELIMINARY_TEST_RESULTS_SENTENCE = "Preliminary_Test_Results_"; + public static final String PRELIMINARY_TEST_RESULTS = "Preliminary_Test_Results"; public static final String TEST_RESULTS = "Test_Results"; public static final String WAS_THE_DEFINITIVE_STUDY_CONDUCTED_WITH_POSITIVE_CONTROL = "Was_the_definitive_study_conducted_with_positive_control"; - public static final String DOSE_MORTALITY_SENTENCE = "Dose_Mortality_"; + public static final String DOSE_MORTALITY = "Dose_Mortality"; public static final String RESULTS_MAIN_STUDY = "Results_Main_Study"; public static final String WHAT_WAS_THE_APPROACH_USED = "What_was_the_approach_used"; public static final String DOSES_MG_PER_KG_BW = "Doses_mg_per_kg_bw"; @@ -157,7 +156,6 @@ public class RSSPoc2Service { continue; } - resultMap.put(STUDY_TITLE, getFirstEntryOrElse(redactionLog, "title", "")); resultMap.put(REPORT_NUMBER, getFirstEntryOrElse(redactionLog, "report_number", "")); resultMap.put(PERFORMING_LABORATORY, getPerformingLaboratory(redactionLog)); @@ -172,16 +170,7 @@ public class RSSPoc2Service { resultMap.put(STUDY_CONCLUSION, combineValuesOfFirstFoundSection(redactionLog, "study_conclusion", " ", "")); - var guidelineDeviationSentences = getAsSentences(redactionLog, "guideline_deviation"); - int i = 1; - for (SCMComponent guidelineDeviationSentence : guidelineDeviationSentences) { - resultMap.put(DEVIATION_FROM_THE_GUIDELINE_SENTENCE + i, guidelineDeviationSentence); - i++; - } - if (guidelineDeviationSentences.isEmpty()) { - resultMap.put(DEVIATION_FROM_THE_GUIDELINE_SENTENCE + 1, - SCMComponent.builder().originalValue("").transformation(String.format("Values of type '%s' as sentences", "guideline_deviation")).build()); - } + resultMap.putAll(getAsBlockPerAnnotation(redactionLog, "guideline_deviation", DEVIATION_FROM_THE_GUIDELINE)); } if (oecdIn(oecd, Set.of("402", "403", "404", "405", "425", "429", "436", "471"))) { @@ -204,118 +193,60 @@ public class RSSPoc2Service { if (oecdIn(oecd, Set.of("403", "436"))) { - var sentences = getAsSentences(redactionLog, "necropsy_findings"); - int i = 1; - for (SCMComponent sentence : sentences) { - resultMap.put(NECROPSY_FINDINGS_SENTENCE + i, sentence); - i++; - } - if (sentences.isEmpty()) { - resultMap.put(NECROPSY_FINDINGS_SENTENCE + 1, - SCMComponent.builder().originalValue("").transformation(String.format("Values of type '%s' as sentences", "necropsy_findings")).build()); - } + resultMap.putAll(getAsBlockPerAnnotation(redactionLog, "necropsy_findings", NECROPSY_FINDINGS)); } if (oecdIn(oecd, Set.of("403", "436"))) { + resultMap.put(CONDUCTED_WITHIN_4_HOURS_OF_EXPOSURE, getAsOneBlock(redactionLog, "4h_exposure")); } if (oecdIn(oecd, Set.of("404", "405", "429", "406", "428", "438", "439", "474", "487"))) { + resultMap.put(STUDY_DESIGN, getAsOneBlock(redactionLog, "study_design")); } if (oecdIn(oecd, Set.of("406", "428", "438", "439", "474", "487"))) { + resultMap.put(RESULTS_AND_CONCLUSIONS, getJoinedValues(redactionLog, "results_and_conclusion", " ")); } if (oecdIn(oecd, Set.of("402"))) { - var weightBehaviorChangesSentences = getAsSentences(redactionLog, "weight_behavior_changes"); - int i = 1; - for (SCMComponent sentence : weightBehaviorChangesSentences) { - resultMap.put(WEIGHT_BEHAVIOR_CHANGES_SENTENCE + i, sentence); - i++; - } - if (weightBehaviorChangesSentences.isEmpty()) { - resultMap.put(WEIGHT_BEHAVIOR_CHANGES_SENTENCE + 1, - SCMComponent.builder().originalValue("").transformation(String.format("Values of type '%s' as sentences", "weight_behavior_changes")).build()); - } + + resultMap.putAll(getAsBlockPerAnnotation(redactionLog, "weight_behavior_changes", WEIGHT_BEHAVIOR_CHANGES)); resultMap.put(MORTALITY_STATEMENT, getAsOneBlock(redactionLog, "mortality_statement")); } if (oecdIn(oecd, Set.of("403"))) { - var sentences = getAsSentences(redactionLog, "clinical_observations"); - int i = 1; - for (SCMComponent sentence : sentences) { - resultMap.put(CLINICAL_OBSERVATIONS_SENTENCE + i, sentence); - i++; - } - if (sentences.isEmpty()) { - resultMap.put(CLINICAL_OBSERVATIONS_SENTENCE + 1, - SCMComponent.builder().originalValue("").transformation(String.format("Values of type '%s' as sentences", "clinical_observations")).build()); - } - - sentences = getAsSentences(redactionLog, "bodyweight_changes"); - i = 1; - for (SCMComponent sentence : sentences) { - resultMap.put(BODY_WEIGHT_CHANGES_SENTENCE + i, sentence); - i++; - } - if (sentences.isEmpty()) { - resultMap.put(BODY_WEIGHT_CHANGES_SENTENCE + 1, - SCMComponent.builder().originalValue("").transformation(String.format("Values of type '%s' as sentences", "bodyweight_changes")).build()); - } + resultMap.putAll(getAsBlockPerAnnotation(redactionLog, "clinical_observations", CLINICAL_OBSERVATIONS)); + resultMap.putAll(getAsBlockPerAnnotation(redactionLog, "bodyweight_changes", BODY_WEIGHT_CHANGES)); } if (oecdIn(oecd, Set.of("404", "405"))) { + resultMap.put(DETAILING_OF_REPORTED_CHANGES, getAsOneBlock(redactionLog, "detailing")); } if (oecdIn(oecd, Set.of("405", "429"))) { + resultMap.put(SEX, getSex(redactionLog)); resultMap.put(NUMBER_OF_ANIMALS, getNumberOfAnimals(redactionLog)); } if (oecdIn(oecd, Set.of("425"))) { - var clinicalSignsSentences = getAsSentences(redactionLog, "clinical_signs"); - int i = 1; - for (SCMComponent clinicalSignsSentence : clinicalSignsSentences) { - resultMap.put(CLINCAL_SIGNS_SENTENCE + i, clinicalSignsSentence); - i++; - } - if (clinicalSignsSentences.isEmpty()) { - resultMap.put(CLINCAL_SIGNS_SENTENCE + 1, - SCMComponent.builder().originalValue("").transformation(String.format("Values of type '%s' as sentences", "clinical_signs")).build()); - } - - var doseMortalitySentences = getDoseMortality(redactionLog); - i = 1; - for (SCMComponent sentence : doseMortalitySentences) { - resultMap.put(DOSE_MORTALITY_SENTENCE + i, sentence); - i++; - } - if (doseMortalitySentences.isEmpty()) { - resultMap.put(DOSE_MORTALITY_SENTENCE + 1, - SCMComponent.builder().originalValue("").transformation("Combine values of 'dose_mortality' and 'dose_mortality_dose' of same row with ', ' ").build()); - } + resultMap.putAll(getAsBlockPerAnnotation(redactionLog, "clinical_signs", CLINCAL_SIGNS)); + resultMap.putAll(getDoseMortality(redactionLog, DOSE_MORTALITY)); resultMap.put(MORTALITY, getAsOneBlock(redactionLog, "mortality")); resultMap.put(DOSAGES, getFirstEntryOrElse(redactionLog, "dosages", "")); } if (oecdIn(oecd, Set.of("429"))) { - var sentences = getAsSentences(redactionLog, "preliminary_test_results"); - int i = 1; - for (SCMComponent sentence : sentences) { - resultMap.put(PRELIMINARY_TEST_RESULTS_SENTENCE + i, sentence); - i++; - } - if (sentences.isEmpty()) { - resultMap.put(PRELIMINARY_TEST_RESULTS_SENTENCE + 1, - SCMComponent.builder().originalValue("").transformation(String.format("Values of type '%s' as sentences", "preliminary_test_results")).build()); - } + + resultMap.putAll(getAsBlockPerAnnotation(redactionLog, "preliminary_test_results", PRELIMINARY_TEST_RESULTS)); resultMap.put(TEST_RESULTS, getAsOneBlock(redactionLog, "test_results")); resultMap.put(WAS_THE_DEFINITIVE_STUDY_CONDUCTED_WITH_POSITIVE_CONTROL, getAsOneBlock(redactionLog, "positive_control")); @@ -336,6 +267,34 @@ public class RSSPoc2Service { } + private static Map componentListToMap(List clinicalSignsList, String componentKey, String transformation) { + + if (clinicalSignsList.size() == 0) { + + return Map.of(componentKey, getEmptyComponent(transformation)); + } + + if (clinicalSignsList.size() == 1) { + + return Map.of(componentKey, clinicalSignsList.get(0)); + } + + int i = 1; + Map results = new LinkedHashMap<>(); + for (SCMComponent block : clinicalSignsList) { + + results.put(String.format("%s_%s", componentKey, i++), block); + } + + return results; + } + + + private static SCMComponent getEmptyComponent(String transformation) { + + return SCMComponent.builder().originalValue("").transformation(transformation).build(); + } + private Map getAllEntities(RedactionLog redactionLog){ Map resultMap = new HashMap<>(); @@ -423,7 +382,7 @@ public class RSSPoc2Service { var laboratoryEntry = redactionLog.getRedactionLogEntry().stream().filter(r -> r.getType().equals("laboratory_name")).findFirst(); if (!laboratoryEntry.isPresent()) { - return SCMComponent.builder().originalValue("").transformation(transformation).build(); + return getEmptyComponent(transformation); } var laboratoryCountry = redactionLog.getRedactionLogEntry() @@ -540,7 +499,7 @@ public class RSSPoc2Service { .build(); } - return SCMComponent.builder().originalValue("").transformation(transformation).build(); + return getEmptyComponent(transformation); } @@ -562,7 +521,7 @@ public class RSSPoc2Service { .build(); } - return SCMComponent.builder().originalValue("").transformation(transformation).build(); + return getEmptyComponent(transformation); } @@ -572,34 +531,23 @@ public class RSSPoc2Service { } - private List getAsSentences(RedactionLog redactionLog, String type) { + private Map getAsBlockPerAnnotation(RedactionLog redactionLog, String type, String componentKey) { - String transformation = String.format("Values of type '%s' as sentences", type); + String transformation = String.format("Values of type '%s'", type); - List sentences = new ArrayList<>(); + List scmComponents = new ArrayList<>(); var typeStringsEntries = redactionLog.getRedactionLogEntry().stream().filter(r -> r.getType().equals(type)).collect(Collectors.toList()); - if (typeStringsEntries.isEmpty()) { - return sentences; - } - for (RedactionLogEntry typeStringEntry : typeStringsEntries) { - BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US); - iterator.setText(typeStringEntry.getValue()); - int start = iterator.first(); - for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) { - - sentences.add(SCMComponent.builder() - .originalValue(typeStringEntry.getValue().substring(start, end).replaceAll("\\n", "").trim()) - .scmAnnotations(List.of(toScmAnnotations(typeStringEntry))) - .transformation(transformation) - .build()); - - } + scmComponents.add(SCMComponent.builder() + .originalValue(typeStringEntry.getValue().replaceAll("\\n", "").trim()) + .scmAnnotations(List.of(toScmAnnotations(typeStringEntry))) + .transformation(transformation) + .build()); } - return sentences; + return componentListToMap(scmComponents, componentKey, transformation); } @@ -707,7 +655,7 @@ public class RSSPoc2Service { var uniqueAnimalNumbers = redactionLog.getRedactionLogEntry().stream().filter(r -> r.getType().equals("animal_number")).collect(Collectors.toSet()); if (uniqueAnimalNumbers.isEmpty()) { - return SCMComponent.builder().originalValue("").transformation(transformation).build(); + return getEmptyComponent(transformation); } int size = uniqueAnimalNumbers.stream().map(a -> a.getValue()).collect(Collectors.toSet()).size(); @@ -720,7 +668,7 @@ public class RSSPoc2Service { } - private List getDoseMortality(RedactionLog redactionLog) { + private Map getDoseMortality(RedactionLog redactionLog, String componentKey) { String transformation = "Combine values of 'dose_mortality' and 'dose_mortality_dose' of same row with ', ' "; @@ -764,7 +712,7 @@ public class RSSPoc2Service { } - return result; + return componentListToMap(result, componentKey, transformation); } @@ -802,9 +750,9 @@ public class RSSPoc2Service { resultMap.put(STRAIN, components.get(STRAIN)); resultMap.put(DOSAGES, components.get(DOSAGES)); resultMap.put(MORTALITY, components.get(MORTALITY)); - resultMap.putAll(getKeyContains(components, DOSE_MORTALITY_SENTENCE)); - resultMap.putAll(getKeyContains(components, CLINCAL_SIGNS_SENTENCE)); - resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE_SENTENCE)); + resultMap.putAll(getKeyContains(components, DOSE_MORTALITY)); + resultMap.putAll(getKeyContains(components, CLINCAL_SIGNS)); + resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE)); resultMap.put(CONCLUSION_LD_50_GREATER_THAN, components.get(CONCLUSION_LD_50_GREATER_THAN)); resultMap.put(CONCLUSION_LD_50_MG_PER_MG, components.get(CONCLUSION_LD_50_MG_PER_MG)); resultMap.put(CONCLUSION_MINIMUM_CONFIDENCE, components.get(CONCLUSION_MINIMUM_CONFIDENCE)); @@ -816,9 +764,9 @@ public class RSSPoc2Service { resultMap.put(STRAIN, components.get(STRAIN)); resultMap.put(DOSES_MG_PER_KG_BW, components.get(DOSES_MG_PER_KG_BW)); resultMap.put(MORTALITY_STATEMENT, components.get(MORTALITY_STATEMENT)); - resultMap.putAll(getKeyContains(components, WEIGHT_BEHAVIOR_CHANGES_SENTENCE)); + resultMap.putAll(getKeyContains(components, WEIGHT_BEHAVIOR_CHANGES)); resultMap.put(NECROPSY_FINDINGS, components.get(NECROPSY_FINDINGS)); - resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE_SENTENCE)); + resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE)); resultMap.put(CONCLUSION_LD_50_GREATER_THAN, components.get(CONCLUSION_LD_50_GREATER_THAN)); resultMap.put(CONCLUSION_LD_50_MG_PER_MG, components.get(CONCLUSION_LD_50_MG_PER_MG)); resultMap.put(CONCLUSION_MINIMUM_CONFIDENCE, components.get(CONCLUSION_MINIMUM_CONFIDENCE)); @@ -830,10 +778,10 @@ public class RSSPoc2Service { resultMap.put(SPECIES, components.get(SPECIES)); resultMap.put(STRAIN, components.get(STRAIN)); resultMap.put(CONDUCTED_WITHIN_4_HOURS_OF_EXPOSURE, components.get(CONDUCTED_WITHIN_4_HOURS_OF_EXPOSURE)); - resultMap.putAll(getKeyContains(components, CLINICAL_OBSERVATIONS_SENTENCE)); - resultMap.putAll(getKeyContains(components, BODY_WEIGHT_CHANGES_SENTENCE)); - resultMap.putAll(getKeyContains(components, NECROPSY_FINDINGS_SENTENCE)); - resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE_SENTENCE)); + resultMap.putAll(getKeyContains(components, CLINICAL_OBSERVATIONS)); + resultMap.putAll(getKeyContains(components, BODY_WEIGHT_CHANGES)); + resultMap.putAll(getKeyContains(components, NECROPSY_FINDINGS)); + resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE)); resultMap.put(CONCLUSION_LD_50_GREATER_THAN, components.get(CONCLUSION_LD_50_GREATER_THAN)); resultMap.put(CONCLUSION_LD_50_MG_PER_MG, components.get(CONCLUSION_LD_50_MG_PER_MG)); resultMap.put(CONCLUSION_MINIMUM_CONFIDENCE, components.get(CONCLUSION_MINIMUM_CONFIDENCE)); @@ -847,7 +795,7 @@ public class RSSPoc2Service { resultMap.put(NUMBER_OF_ANIMALS, components.get(NUMBER_OF_ANIMALS)); resultMap.put(STUDY_DESIGN, components.get(STUDY_DESIGN)); resultMap.put(DETAILING_OF_REPORTED_CHANGES, components.get(DETAILING_OF_REPORTED_CHANGES)); - resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE_SENTENCE)); + resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE)); resultMap.put(STUDY_CONCLUSION, components.get(STUDY_CONCLUSION)); break; case "429": @@ -856,12 +804,12 @@ public class RSSPoc2Service { resultMap.put(WAS_THE_DEFINITIVE_STUDY_CONDUCTED_WITH_POSITIVE_CONTROL, components.get(WAS_THE_DEFINITIVE_STUDY_CONDUCTED_WITH_POSITIVE_CONTROL)); resultMap.put(STUDY_DESIGN, components.get(STUDY_DESIGN)); resultMap.put(RESULTS_MAIN_STUDY, components.get(RESULTS_MAIN_STUDY)); - resultMap.putAll(getKeyContains(components, PRELIMINARY_TEST_RESULTS_SENTENCE)); + resultMap.putAll(getKeyContains(components, PRELIMINARY_TEST_RESULTS)); resultMap.put(WHAT_WAS_THE_APPROACH_USED, components.get(WHAT_WAS_THE_APPROACH_USED)); resultMap.put(SEX, components.get(SEX)); resultMap.put(NUMBER_OF_ANIMALS, components.get(NUMBER_OF_ANIMALS)); resultMap.put(TEST_RESULTS, components.get(TEST_RESULTS)); - resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE_SENTENCE)); + resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE)); resultMap.put(STUDY_CONCLUSION, components.get(STUDY_CONCLUSION)); break; case "404": @@ -869,12 +817,12 @@ public class RSSPoc2Service { resultMap.put(STRAIN, components.get(STRAIN)); resultMap.put(STUDY_DESIGN, components.get(STUDY_DESIGN)); resultMap.put(DETAILING_OF_REPORTED_CHANGES, components.get(DETAILING_OF_REPORTED_CHANGES)); - resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE_SENTENCE)); + resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE)); resultMap.put(STUDY_CONCLUSION, components.get(STUDY_CONCLUSION)); break; case "471": resultMap.put(STRAIN, components.get(STRAIN)); - resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE_SENTENCE)); + resultMap.putAll(getKeyContains(components, DEVIATION_FROM_THE_GUIDELINE)); resultMap.put(STUDY_CONCLUSION, components.get(STUDY_CONCLUSION)); break; }