From 07b12f59eb88065c16aae7751ee574ba37c639e9 Mon Sep 17 00:00:00 2001 From: PempheroM Date: Thu, 13 Nov 2025 10:58:12 +0200 Subject: [PATCH 01/52] scenario file --- .../nurses_scenario_analyses.py | 158 ++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 src/scripts/nurses_analyses/nurses_scenario_analyses.py diff --git a/src/scripts/nurses_analyses/nurses_scenario_analyses.py b/src/scripts/nurses_analyses/nurses_scenario_analyses.py new file mode 100644 index 0000000000..b61e5fa44d --- /dev/null +++ b/src/scripts/nurses_analyses/nurses_scenario_analyses.py @@ -0,0 +1,158 @@ +""" +This scenario file sets up the scenarios for simulating the effects of nursing staffing levels +The scenario +0- Baseline scenario +1- +2- + + +""" +from pathlib import Path +from typing import Dict + +import pandas as pd + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher +from tlo.scenario import BaseScenario + + +class StaffingScenario(BaseScenario): + def __init__(self): + super().__init__() + self.seed=12 + self.start_date=Date(2010, 1, 1) + self.end_date=Date(2030, 1, 1) + self.initial_population_size=200 + self.number_of_draws=2 + self.runs_per_draw=2 + + def log_configuration(self): + return { + 'filename': 'nurses_scenario_outputs', + 'directory': Path('./outputs'), # <- (specified only for local running) + 'custom_levels': { + '*': logging.WARNING, + 'tlo.methods.demography': logging.INFO, + 'tlo.methods.demography.detail': logging.WARNING, + 'tlo.methods.healthburden': logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + } + } + + def modules(self): + return fullmodel(resourcefilepath=self.resources) + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + + def draw_parameters(self, draw_number, rng): + if draw_number < self.number_of_draws: + return list(self._scenarios.values())[draw_number] + else: + return + + def _get_scenarios(self) -> Dict[str, Dict]: + """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario. + """ + return { + "Baseline": + mix_scenarios( + get_parameters_for_status_quo(), + { + "HealthSystem": { + "ResourceFile_HR_scaling_by_level_and_officer_type": "default", + "year_mode_switch": 2025, + "mode_appt_constraints_postSwitch": 2, + "scale_to_effective_capabilities": True, + "policy_name": "Naive", + "tclose_overwrite": 1, + "tclose_days_offset_overwrite": 7, + "use_funded_or_actual_staffing": "actual", + "year_cons_availability_switch": 2025, + "cons_availability_postSwitch": "all", + }, + } + ), + + "Improved Staffing": + mix_scenarios( + get_parameters_for_status_quo(), + { + "HealthSystem": { + "ResourceFile_HR_scaling_by_level_and_officer_type": "default", + "year_mode_switch": 2025, + "mode_appt_constraints_postSwitch": 2, + "scale_to_effective_capabilities": True, + "policy_name": "Naive", + "tclose_overwrite": 1, + "tclose_days_offset_overwrite": 7, + "use_funded_or_actual_staffing": "funded_plus", + "year_cons_availability_switch": 2025, + "cons_availability_postSwitch": "all", + }, + } + ), + + "Worst-case Scenario": + mix_scenarios( + get_parameters_for_status_quo(), + { + "HealthSystem": { + "yearly_HR_scaling_mode": "GDP_growth_fHE_case1", + "year_mode_switch": 2019, + "mode_appt_constraints_postSwitch": 2, + "scale_to_effective_capabilities": True, + "policy_name": "Naive", + "tclose_overwrite": 1, + "tclose_days_offset_overwrite": 7, + "use_funded_or_actual_staffing": "actual", + "year_cons_availability_switch": 2019, + "cons_availability_postSwitch": "all", + }, + } + ), + + "Demand Sensitivity": + mix_scenarios( + get_parameters_for_status_quo(), + { + "HealthSystem": { + "yearly_HR_scaling_mode": "GDP_growth_fHE_case3", + "year_mode_switch": 2019, + "mode_appt_constraints_postSwitch": 2, + "scale_to_effective_capabilities": True, + "policy_name": "Naive", + "tclose_overwrite": 1, + "tclose_days_offset_overwrite": 7, + "use_funded_or_actual_staffing": "actual", + "year_cons_availability_switch": 2019, + "cons_availability_postSwitch": "all", + }, + } + ), + +#Look into doing sensitivity analyses in the model + "Time appointments Sensitivity": + mix_scenarios( + get_parameters_for_status_quo(), + { + "HealthSystem": { + "yearly_HR_scaling_mode": "GDP_growth_FL_case2_const_tot_i", + "year_mode_switch": 2019, + "mode_appt_constraints_postSwitch": 2, + "scale_to_effective_capabilities": True, + "policy_name": "Naive", + "tclose_overwrite": 1, + "tclose_days_offset_overwrite": 7, + "use_funded_or_actual_staffing": "actual", + "year_cons_availability_switch": 2019, + "cons_availability_postSwitch": "all", + }, + } + ), + } + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) From 6617d49e5a984ab71f892e13e240e60af213306a Mon Sep 17 00:00:00 2001 From: PempheroM Date: Thu, 20 Nov 2025 10:14:45 +0200 Subject: [PATCH 02/52] changes_yearmode,hr scaling --- .../nurses_analyses/nurses_scenario_analyses.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/scripts/nurses_analyses/nurses_scenario_analyses.py b/src/scripts/nurses_analyses/nurses_scenario_analyses.py index b61e5fa44d..472726bc59 100644 --- a/src/scripts/nurses_analyses/nurses_scenario_analyses.py +++ b/src/scripts/nurses_analyses/nurses_scenario_analyses.py @@ -61,7 +61,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: { "HealthSystem": { "ResourceFile_HR_scaling_by_level_and_officer_type": "default", - "year_mode_switch": 2025, + "year_mode_switch": 2020, "mode_appt_constraints_postSwitch": 2, "scale_to_effective_capabilities": True, "policy_name": "Naive", @@ -80,7 +80,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: { "HealthSystem": { "ResourceFile_HR_scaling_by_level_and_officer_type": "default", - "year_mode_switch": 2025, + "year_mode_switch": 2020, "mode_appt_constraints_postSwitch": 2, "scale_to_effective_capabilities": True, "policy_name": "Naive", @@ -98,8 +98,8 @@ def _get_scenarios(self) -> Dict[str, Dict]: get_parameters_for_status_quo(), { "HealthSystem": { - "yearly_HR_scaling_mode": "GDP_growth_fHE_case1", - "year_mode_switch": 2019, + "yearly_HR_scaling_mode": "historical_scaling", + "year_mode_switch": 2020, "mode_appt_constraints_postSwitch": 2, "scale_to_effective_capabilities": True, "policy_name": "Naive", @@ -117,8 +117,8 @@ def _get_scenarios(self) -> Dict[str, Dict]: get_parameters_for_status_quo(), { "HealthSystem": { - "yearly_HR_scaling_mode": "GDP_growth_fHE_case3", - "year_mode_switch": 2019, + "yearly_HR_scaling_mode": "historical_scaling", + "year_mode_switch": 2020, "mode_appt_constraints_postSwitch": 2, "scale_to_effective_capabilities": True, "policy_name": "Naive", @@ -137,8 +137,8 @@ def _get_scenarios(self) -> Dict[str, Dict]: get_parameters_for_status_quo(), { "HealthSystem": { - "yearly_HR_scaling_mode": "GDP_growth_FL_case2_const_tot_i", - "year_mode_switch": 2019, + "yearly_HR_scaling_mode": "historical_scaling", + "year_mode_switch": 2020, "mode_appt_constraints_postSwitch": 2, "scale_to_effective_capabilities": True, "policy_name": "Naive", From f586244c9b5221efa9c97e20ca92172377f3e764 Mon Sep 17 00:00:00 2001 From: PempheroM Date: Fri, 21 Nov 2025 13:25:09 +0200 Subject: [PATCH 03/52] added default function --- .../nurses_scenario_analyses.py | 82 ++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) diff --git a/src/scripts/nurses_analyses/nurses_scenario_analyses.py b/src/scripts/nurses_analyses/nurses_scenario_analyses.py index 472726bc59..6353f0ff38 100644 --- a/src/scripts/nurses_analyses/nurses_scenario_analyses.py +++ b/src/scripts/nurses_analyses/nurses_scenario_analyses.py @@ -45,6 +45,86 @@ def log_configuration(self): def modules(self): return fullmodel(resourcefilepath=self.resources) + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + def _default_of_all_scenarios(self) -> Dict: + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'HealthSystem': { + 'mode_appt_constraints': 1, + 'mode_appt_constraints_postSwitch': 2, + "scale_to_effective_capabilities": True, + # This happens in the year before mode change, as the model calibration is done by that year + "year_mode_switch": 2020, + 'cons_availability': 'default', + 'cons_availability_postSwitch': "all", + # 'year_cons_availability_switch': 2025, + 'HR_budget_growth_rate': self.hr_budget[0], + 'yearly_HR_scaling_mode': 'historical_scaling', # for 5 years of 2020-2024; source data year 2019 + 'start_year_HR_expansion_by_officer_type': self.YEAR_OF_HRH_EXPANSION, + 'end_year_HR_expansion_by_officer_type': self.end_date.year, + "policy_name": 'Naive', + "tclose_overwrite": 1, + "tclose_days_offset_overwrite": 7, + }, + 'ImprovedHealthSystemAndCareSeekingScenarioSwitcher': { + 'max_healthcare_seeking': [False, False], + 'max_healthsystem_function': self.hs_function[0], + 'year_of_switch': self.YEAR_OF_HRH_EXPANSION, + } + }, + ) + + def _baseline_scenario(self) -> Dict: + return mix_scenarios( + self._default_of_all_scenarios(), + { + 'HealthSystem': { + 'ResourceFile_HR_scaling_by_level_and_officer_type': "historical_scaling", + 'mode_appt_constraints_postSwitch': 2, + "use_funded_or_actual_staffing": "actual", + }, + }, + ) + + def _improved_staffing_scenario(self) -> Dict: + return mix_scenarios( + self._default_of_all_scenarios(), + { + 'HealthSystem': { + 'ResourceFile_HR_scaling_by_level_and_officer_type': "historical_scaling", + 'mode_appt_constraints_postSwitch': 2, + "use_funded_or_actual_staffing": "funded_plus", + }, + }, + ) + + def _worst_case_scenario(self) -> Dict: + return mix_scenarios( + self._default_of_all_scenarios(), + { + 'HealthSystem': { + 'ResourceFile_HR_scaling_by_level_and_officer_type': "historical_scaling", + 'mode_appt_constraints_postSwitch': 2, + "use_funded_or_actual_staffing": "actual", + }, + }, + ) + # To be sensitivity analysis + # def _baseline_scenario(self) -> Dict: + # return mix_scenarios( + # self._default_of_all_scenarios(), + # { + # 'HealthSystem': { + # 'ResourceFile_HR_scaling_by_level_and_officer_type': "historical_scaling", + # 'year_mode_switch': 2020, + # 'mode_appt_constraints_postSwitch': 2, + # 'scale_to_effective_capabilities': True, + # "use_funded_or_actual_staffing": "actual", + # "year_cons_availability_switch": 2025, + # "cons_availability_postSwitch": "all", + # }, + # }, + # ) def draw_parameters(self, draw_number, rng): if draw_number < self.number_of_draws: return list(self._scenarios.values())[draw_number] @@ -57,7 +137,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: return { "Baseline": mix_scenarios( - get_parameters_for_status_quo(), + self._default_of_all_scenarios(), { "HealthSystem": { "ResourceFile_HR_scaling_by_level_and_officer_type": "default", From 94a9b35069c449ea80a31705de779a44038bf9a9 Mon Sep 17 00:00:00 2001 From: PempheroM Date: Fri, 21 Nov 2025 14:07:32 +0200 Subject: [PATCH 04/52] reformat --- .../nurses_scenario_analyses.py | 241 +++++++----------- 1 file changed, 95 insertions(+), 146 deletions(-) diff --git a/src/scripts/nurses_analyses/nurses_scenario_analyses.py b/src/scripts/nurses_analyses/nurses_scenario_analyses.py index 6353f0ff38..12ab9b3173 100644 --- a/src/scripts/nurses_analyses/nurses_scenario_analyses.py +++ b/src/scripts/nurses_analyses/nurses_scenario_analyses.py @@ -22,12 +22,14 @@ class StaffingScenario(BaseScenario): def __init__(self): super().__init__() - self.seed=12 - self.start_date=Date(2010, 1, 1) - self.end_date=Date(2030, 1, 1) - self.initial_population_size=200 - self.number_of_draws=2 - self.runs_per_draw=2 + self.seed = 0 + self.start_date = Date(2010, 1, 1) + self.end_date = Date(2030, 1, 1) + self.initial_population_size = 200 + self._scenarios = self._get_scenarios() + self.number_of_draws = len(self._scenarios) + self.number_of_draws = 2 + self.runs_per_draw = 2 def log_configuration(self): return { @@ -43,7 +45,14 @@ def log_configuration(self): } def modules(self): - return fullmodel(resourcefilepath=self.resources) + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + return fullmodel(resourcefilepath=self.resources) + [ + ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + + def draw_parameters(self, draw_number, rng): + if draw_number < self.number_of_draws: + return list(self._scenarios.values())[draw_number] + else: + return def _default_of_all_scenarios(self) -> Dict: return mix_scenarios( @@ -57,7 +66,7 @@ def _default_of_all_scenarios(self) -> Dict: "year_mode_switch": 2020, 'cons_availability': 'default', 'cons_availability_postSwitch': "all", - # 'year_cons_availability_switch': 2025, + # 'year_cons_availability_switch': 2025, 'HR_budget_growth_rate': self.hr_budget[0], 'yearly_HR_scaling_mode': 'historical_scaling', # for 5 years of 2020-2024; source data year 2019 'start_year_HR_expansion_by_officer_type': self.YEAR_OF_HRH_EXPANSION, @@ -74,41 +83,86 @@ def _default_of_all_scenarios(self) -> Dict: }, ) - def _baseline_scenario(self) -> Dict: - return mix_scenarios( - self._default_of_all_scenarios(), - { - 'HealthSystem': { - 'ResourceFile_HR_scaling_by_level_and_officer_type': "historical_scaling", - 'mode_appt_constraints_postSwitch': 2, - "use_funded_or_actual_staffing": "actual", - }, - }, - ) + # def _baseline_scenario(self) -> Dict: + # return mix_scenarios( + # self._default_of_all_scenarios(), + # { + # 'HealthSystem': { + # 'ResourceFile_HR_scaling_by_level_and_officer_type': "historical_scaling", + # 'mode_appt_constraints_postSwitch': 2, + # "use_funded_or_actual_staffing": "actual", + # }, + # }, + # ) + # + # def _improved_staffing_scenario(self) -> Dict: + # return mix_scenarios( + # self._default_of_all_scenarios(), + # { + # 'HealthSystem': { + # 'ResourceFile_HR_scaling_by_level_and_officer_type': "historical_scaling", + # 'mode_appt_constraints_postSwitch': 2, + # "use_funded_or_actual_staffing": "funded_plus", + # }, + # }, + # ) + # + # def _worst_case_scenario(self) -> Dict: + # return mix_scenarios( + # self._default_of_all_scenarios(), + # { + # 'HealthSystem': { + # 'ResourceFile_HR_scaling_by_level_and_officer_type': "historical_scaling", + # 'mode_appt_constraints_postSwitch': 2, + # "use_funded_or_actual_staffing": "actual", + # }, + # }, + # ) + #################################################################### + def _get_scenarios(self) -> Dict[str, Dict]: + """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario. + """ + return { + "Baseline": + mix_scenarios( + self._default_of_all_scenarios(), + { + "HealthSystem": { + 'ResourceFile_HR_scaling_by_level_and_officer_type': "historical_scaling", + 'mode_appt_constraints_postSwitch': 2, + "use_funded_or_actual_staffing": "actual", + }, + } + ), + + "Improved Staffing": + mix_scenarios( + self._default_of_all_scenarios(), + { + "HealthSystem": { + 'ResourceFile_HR_scaling_by_level_and_officer_type': "historical_scaling", + 'mode_appt_constraints_postSwitch': 2, + "use_funded_or_actual_staffing": "funded_plus", + }, + } + ), + + "Worse Case": + mix_scenarios( + self._default_of_all_scenarios(), + { + "HealthSystem": { + 'ResourceFile_HR_scaling_by_level_and_officer_type': "historical_scaling", + 'mode_appt_constraints_postSwitch': 2, + "use_funded_or_actual_staffing": "actual", + }, + } + ), + } + + - def _improved_staffing_scenario(self) -> Dict: - return mix_scenarios( - self._default_of_all_scenarios(), - { - 'HealthSystem': { - 'ResourceFile_HR_scaling_by_level_and_officer_type': "historical_scaling", - 'mode_appt_constraints_postSwitch': 2, - "use_funded_or_actual_staffing": "funded_plus", - }, - }, - ) - def _worst_case_scenario(self) -> Dict: - return mix_scenarios( - self._default_of_all_scenarios(), - { - 'HealthSystem': { - 'ResourceFile_HR_scaling_by_level_and_officer_type': "historical_scaling", - 'mode_appt_constraints_postSwitch': 2, - "use_funded_or_actual_staffing": "actual", - }, - }, - ) # To be sensitivity analysis # def _baseline_scenario(self) -> Dict: # return mix_scenarios( @@ -125,112 +179,7 @@ def _worst_case_scenario(self) -> Dict: # }, # }, # ) - def draw_parameters(self, draw_number, rng): - if draw_number < self.number_of_draws: - return list(self._scenarios.values())[draw_number] - else: - return - def _get_scenarios(self) -> Dict[str, Dict]: - """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario. - """ - return { - "Baseline": - mix_scenarios( - self._default_of_all_scenarios(), - { - "HealthSystem": { - "ResourceFile_HR_scaling_by_level_and_officer_type": "default", - "year_mode_switch": 2020, - "mode_appt_constraints_postSwitch": 2, - "scale_to_effective_capabilities": True, - "policy_name": "Naive", - "tclose_overwrite": 1, - "tclose_days_offset_overwrite": 7, - "use_funded_or_actual_staffing": "actual", - "year_cons_availability_switch": 2025, - "cons_availability_postSwitch": "all", - }, - } - ), - - "Improved Staffing": - mix_scenarios( - get_parameters_for_status_quo(), - { - "HealthSystem": { - "ResourceFile_HR_scaling_by_level_and_officer_type": "default", - "year_mode_switch": 2020, - "mode_appt_constraints_postSwitch": 2, - "scale_to_effective_capabilities": True, - "policy_name": "Naive", - "tclose_overwrite": 1, - "tclose_days_offset_overwrite": 7, - "use_funded_or_actual_staffing": "funded_plus", - "year_cons_availability_switch": 2025, - "cons_availability_postSwitch": "all", - }, - } - ), - - "Worst-case Scenario": - mix_scenarios( - get_parameters_for_status_quo(), - { - "HealthSystem": { - "yearly_HR_scaling_mode": "historical_scaling", - "year_mode_switch": 2020, - "mode_appt_constraints_postSwitch": 2, - "scale_to_effective_capabilities": True, - "policy_name": "Naive", - "tclose_overwrite": 1, - "tclose_days_offset_overwrite": 7, - "use_funded_or_actual_staffing": "actual", - "year_cons_availability_switch": 2019, - "cons_availability_postSwitch": "all", - }, - } - ), - - "Demand Sensitivity": - mix_scenarios( - get_parameters_for_status_quo(), - { - "HealthSystem": { - "yearly_HR_scaling_mode": "historical_scaling", - "year_mode_switch": 2020, - "mode_appt_constraints_postSwitch": 2, - "scale_to_effective_capabilities": True, - "policy_name": "Naive", - "tclose_overwrite": 1, - "tclose_days_offset_overwrite": 7, - "use_funded_or_actual_staffing": "actual", - "year_cons_availability_switch": 2019, - "cons_availability_postSwitch": "all", - }, - } - ), - -#Look into doing sensitivity analyses in the model - "Time appointments Sensitivity": - mix_scenarios( - get_parameters_for_status_quo(), - { - "HealthSystem": { - "yearly_HR_scaling_mode": "historical_scaling", - "year_mode_switch": 2020, - "mode_appt_constraints_postSwitch": 2, - "scale_to_effective_capabilities": True, - "policy_name": "Naive", - "tclose_overwrite": 1, - "tclose_days_offset_overwrite": 7, - "use_funded_or_actual_staffing": "actual", - "year_cons_availability_switch": 2019, - "cons_availability_postSwitch": "all", - }, - } - ), - } if __name__ == '__main__': from tlo.cli import scenario_run From ec05b17d3ccbf98d07a29992c056452087ef3742 Mon Sep 17 00:00:00 2001 From: PempheroM Date: Fri, 21 Nov 2025 14:15:20 +0200 Subject: [PATCH 05/52] remove unused input --- src/scripts/nurses_analyses/nurses_scenario_analyses.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/scripts/nurses_analyses/nurses_scenario_analyses.py b/src/scripts/nurses_analyses/nurses_scenario_analyses.py index 12ab9b3173..60705d2f60 100644 --- a/src/scripts/nurses_analyses/nurses_scenario_analyses.py +++ b/src/scripts/nurses_analyses/nurses_scenario_analyses.py @@ -10,8 +10,6 @@ from pathlib import Path from typing import Dict -import pandas as pd - from tlo import Date, logging from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios from tlo.methods.fullmodel import fullmodel From 7994ed826533ba205e986bf634fd84338c8a19c8 Mon Sep 17 00:00:00 2001 From: PempheroM Date: Thu, 27 Nov 2025 10:29:22 +0200 Subject: [PATCH 06/52] added files for improved and worse case scenarios --- .../custom_doubling.csv | 3 +++ .../custom_worse.csv | 3 +++ .../nurses_scenario_analyses.py | 26 +++++++++++++------ 3 files changed, 24 insertions(+), 8 deletions(-) create mode 100644 resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_doubling.csv create mode 100644 resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_worse.csv diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_doubling.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_doubling.csv new file mode 100644 index 0000000000..a4e0539548 --- /dev/null +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_doubling.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8298c7b85ad5e3989c56478db11f4e1efe3f8560ef56bdfd6725835b47d37a6 +size 429 diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_worse.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_worse.csv new file mode 100644 index 0000000000..c460433325 --- /dev/null +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_worse.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd8fc7165267d760709fbcbf49dfd553646302f0b62c61326a304534745f016f +size 450 diff --git a/src/scripts/nurses_analyses/nurses_scenario_analyses.py b/src/scripts/nurses_analyses/nurses_scenario_analyses.py index 60705d2f60..b2df8b9651 100644 --- a/src/scripts/nurses_analyses/nurses_scenario_analyses.py +++ b/src/scripts/nurses_analyses/nurses_scenario_analyses.py @@ -65,18 +65,15 @@ def _default_of_all_scenarios(self) -> Dict: 'cons_availability': 'default', 'cons_availability_postSwitch': "all", # 'year_cons_availability_switch': 2025, - 'HR_budget_growth_rate': self.hr_budget[0], 'yearly_HR_scaling_mode': 'historical_scaling', # for 5 years of 2020-2024; source data year 2019 - 'start_year_HR_expansion_by_officer_type': self.YEAR_OF_HRH_EXPANSION, - 'end_year_HR_expansion_by_officer_type': self.end_date.year, "policy_name": 'Naive', "tclose_overwrite": 1, "tclose_days_offset_overwrite": 7, }, 'ImprovedHealthSystemAndCareSeekingScenarioSwitcher': { 'max_healthcare_seeking': [False, False], - 'max_healthsystem_function': self.hs_function[0], - 'year_of_switch': self.YEAR_OF_HRH_EXPANSION, + 'max_healthsystem_function': [False, False], + 'year_of_switch': 2025, } }, ) @@ -126,7 +123,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: self._default_of_all_scenarios(), { "HealthSystem": { - 'ResourceFile_HR_scaling_by_level_and_officer_type': "historical_scaling", + 'ResourceFile_HR_scaling_by_level_and_officer_type': "default", 'mode_appt_constraints_postSwitch': 2, "use_funded_or_actual_staffing": "actual", }, @@ -138,19 +135,32 @@ def _get_scenarios(self) -> Dict[str, Dict]: self._default_of_all_scenarios(), { "HealthSystem": { - 'ResourceFile_HR_scaling_by_level_and_officer_type': "historical_scaling", + 'ResourceFile_HR_scaling_by_level_and_officer_type': "default", 'mode_appt_constraints_postSwitch': 2, "use_funded_or_actual_staffing": "funded_plus", }, } ), + "Improved Staffing Doubled Establishment": + mix_scenarios( + self._default_of_all_scenarios(), + { + "HealthSystem": { + 'ResourceFile_HR_scaling_by_level_and_officer_type': "custom_doubling", + 'mode_appt_constraints_postSwitch': 2, + "use_funded_or_actual_staffing": "funded_plus", + }, + } + ), + + "Worse Case": mix_scenarios( self._default_of_all_scenarios(), { "HealthSystem": { - 'ResourceFile_HR_scaling_by_level_and_officer_type': "historical_scaling", + 'ResourceFile_HR_scaling_by_level_and_officer_type': "custom_worse", 'mode_appt_constraints_postSwitch': 2, "use_funded_or_actual_staffing": "actual", }, From 40406ef6468d8a2e8778f42a0edab6def6c591b5 Mon Sep 17 00:00:00 2001 From: PempheroM Date: Mon, 8 Dec 2025 11:15:41 +0200 Subject: [PATCH 07/52] addressing comments --- .../nurses_scenario_analyses.py | 22 +++++-------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/src/scripts/nurses_analyses/nurses_scenario_analyses.py b/src/scripts/nurses_analyses/nurses_scenario_analyses.py index b2df8b9651..420ac1a51d 100644 --- a/src/scripts/nurses_analyses/nurses_scenario_analyses.py +++ b/src/scripts/nurses_analyses/nurses_scenario_analyses.py @@ -26,7 +26,6 @@ def __init__(self): self.initial_population_size = 200 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) - self.number_of_draws = 2 self.runs_per_draw = 2 def log_configuration(self): @@ -60,6 +59,7 @@ def _default_of_all_scenarios(self) -> Dict: 'mode_appt_constraints': 1, 'mode_appt_constraints_postSwitch': 2, "scale_to_effective_capabilities": True, + "year_HR_scaling_by_level_and_officer_type": 2025, # This happens in the year before mode change, as the model calibration is done by that year "year_mode_switch": 2020, 'cons_availability': 'default', @@ -123,9 +123,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: self._default_of_all_scenarios(), { "HealthSystem": { - 'ResourceFile_HR_scaling_by_level_and_officer_type': "default", - 'mode_appt_constraints_postSwitch': 2, - "use_funded_or_actual_staffing": "actual", + 'HR_scaling_by_level_and_officer_type_mode': "default", }, } ), @@ -135,9 +133,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: self._default_of_all_scenarios(), { "HealthSystem": { - 'ResourceFile_HR_scaling_by_level_and_officer_type': "default", - 'mode_appt_constraints_postSwitch': 2, - "use_funded_or_actual_staffing": "funded_plus", + 'HR_scaling_by_level_and_officer_type_mode': "default", }, } ), @@ -147,30 +143,22 @@ def _get_scenarios(self) -> Dict[str, Dict]: self._default_of_all_scenarios(), { "HealthSystem": { - 'ResourceFile_HR_scaling_by_level_and_officer_type': "custom_doubling", - 'mode_appt_constraints_postSwitch': 2, - "use_funded_or_actual_staffing": "funded_plus", + 'HR_scaling_by_level_and_officer_type_mode': "custom_doubling", }, } ), - "Worse Case": mix_scenarios( self._default_of_all_scenarios(), { "HealthSystem": { - 'ResourceFile_HR_scaling_by_level_and_officer_type': "custom_worse", - 'mode_appt_constraints_postSwitch': 2, - "use_funded_or_actual_staffing": "actual", + 'HR_scaling_by_level_and_officer_type_mode': "custom_worse", }, } ), } - - - # To be sensitivity analysis # def _baseline_scenario(self) -> Dict: # return mix_scenarios( From 6657544d2b99316652dfd306d7d7f1edfa7fb0cb Mon Sep 17 00:00:00 2001 From: thewati Date: Tue, 9 Dec 2025 15:59:56 +0200 Subject: [PATCH 08/52] . --- .../clinics/ResourceFile_ClinicConfigurations/Default.csv | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/resources/healthsystem/human_resources/clinics/ResourceFile_ClinicConfigurations/Default.csv b/resources/healthsystem/human_resources/clinics/ResourceFile_ClinicConfigurations/Default.csv index 88c9a3cb73..871f162935 100644 --- a/resources/healthsystem/human_resources/clinics/ResourceFile_ClinicConfigurations/Default.csv +++ b/resources/healthsystem/human_resources/clinics/ResourceFile_ClinicConfigurations/Default.csv @@ -1 +1,3 @@ -Facility_ID,Officer_Type_Code,GenericClinic +version https://git-lfs.github.com/spec/v1 +oid sha256:cd312903ff50d5233d81075b1f38e7879b8933e3ad7067d52c696e4f37e51eac +size 44 From cc6028502a87ce2d0320c597c5b7206116a4bcac Mon Sep 17 00:00:00 2001 From: thewati Date: Tue, 3 Feb 2026 17:05:46 +0200 Subject: [PATCH 09/52] setup analyses for qs --- .../custom_doubling.csv | 3 - .../custom_worse.csv | 4 +- .../improved_staffing.csv | 3 + .../nurses_scenario_analyses.py | 196 +++++++++++------- 4 files changed, 131 insertions(+), 75 deletions(-) delete mode 100644 resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_doubling.csv create mode 100644 resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/improved_staffing.csv diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_doubling.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_doubling.csv deleted file mode 100644 index a4e0539548..0000000000 --- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_doubling.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d8298c7b85ad5e3989c56478db11f4e1efe3f8560ef56bdfd6725835b47d37a6 -size 429 diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_worse.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_worse.csv index c460433325..764f6bd018 100644 --- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_worse.csv +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_worse.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bd8fc7165267d760709fbcbf49dfd553646302f0b62c61326a304534745f016f -size 450 +oid sha256:39c8f8e930be2330148a31442e95e041f7f45d6b9c623c635081c0c7b20ff1c1 +size 338 diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/improved_staffing.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/improved_staffing.csv new file mode 100644 index 0000000000..800d70a992 --- /dev/null +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/improved_staffing.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c676e4dde332f210831076cff711d73c280aed3638beba226f6418e1413dacb +size 317 diff --git a/src/scripts/nurses_analyses/nurses_scenario_analyses.py b/src/scripts/nurses_analyses/nurses_scenario_analyses.py index 420ac1a51d..f869dc9f44 100644 --- a/src/scripts/nurses_analyses/nurses_scenario_analyses.py +++ b/src/scripts/nurses_analyses/nurses_scenario_analyses.py @@ -1,11 +1,15 @@ """ This scenario file sets up the scenarios for simulating the effects of nursing staffing levels -The scenario -0- Baseline scenario -1- -2- - - +The scenarios are: +0- Baseline +1- Baseline Perfect Healthcare Seeking +2- Baseline Perfect Clinical Practice +3- Improved Staffing +4- Improved Perfect Healthcare Seeking +5- Improved Perfect Clinical Practice +6- Worst Case +7- Worst Perfect Healthcare Seeking +8- Worst Perfect Healthcare Seeking """ from pathlib import Path from typing import Dict @@ -15,15 +19,18 @@ from tlo.methods.fullmodel import fullmodel from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher from tlo.scenario import BaseScenario +from tlo.analysis.utils import get_root_path + class StaffingScenario(BaseScenario): def __init__(self): super().__init__() + self.resources = get_root_path() / "resources" self.seed = 0 self.start_date = Date(2010, 1, 1) self.end_date = Date(2030, 1, 1) - self.initial_population_size = 200 + self.pop_size = 200 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) self.runs_per_draw = 2 @@ -42,8 +49,14 @@ def log_configuration(self): } def modules(self): - return fullmodel(resourcefilepath=self.resources) + [ - ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + return fullmodel() + [ + ImprovedHealthSystemAndCareSeekingScenarioSwitcher()] + + # def modules(self): + # return ( + # fullmodel(resourcefilepath=self.resources) + # + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + # ) def draw_parameters(self, draw_number, rng): if draw_number < self.number_of_draws: @@ -78,42 +91,28 @@ def _default_of_all_scenarios(self) -> Dict: }, ) - # def _baseline_scenario(self) -> Dict: - # return mix_scenarios( - # self._default_of_all_scenarios(), - # { - # 'HealthSystem': { - # 'ResourceFile_HR_scaling_by_level_and_officer_type': "historical_scaling", - # 'mode_appt_constraints_postSwitch': 2, - # "use_funded_or_actual_staffing": "actual", - # }, - # }, - # ) - # - # def _improved_staffing_scenario(self) -> Dict: - # return mix_scenarios( - # self._default_of_all_scenarios(), - # { - # 'HealthSystem': { - # 'ResourceFile_HR_scaling_by_level_and_officer_type': "historical_scaling", - # 'mode_appt_constraints_postSwitch': 2, - # "use_funded_or_actual_staffing": "funded_plus", - # }, - # }, - # ) - # - # def _worst_case_scenario(self) -> Dict: - # return mix_scenarios( - # self._default_of_all_scenarios(), - # { - # 'HealthSystem': { - # 'ResourceFile_HR_scaling_by_level_and_officer_type': "historical_scaling", - # 'mode_appt_constraints_postSwitch': 2, - # "use_funded_or_actual_staffing": "actual", - # }, - # }, - # ) - #################################################################### + def _default_of_all_max_healthsystem_scenarios(self) -> Dict: + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'HealthSystem': { + 'mode_appt_constraints': 1, + 'mode_appt_constraints_postSwitch': 2, + "scale_to_effective_capabilities": True, + "year_HR_scaling_by_level_and_officer_type": 2025, + # This happens in the year before mode change, as the model calibration is done by that year + "year_mode_switch": 2020, + 'cons_availability': 'default', + 'cons_availability_postSwitch': "all", + # 'year_cons_availability_switch': 2025, + 'yearly_HR_scaling_mode': 'historical_scaling', # for 5 years of 2020-2024; source data year 2019 + "policy_name": 'Naive', + "tclose_overwrite": 1, + "tclose_days_offset_overwrite": 7, + }, + }, + ) + def _get_scenarios(self) -> Dict[str, Dict]: """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario. """ @@ -128,27 +127,73 @@ def _get_scenarios(self) -> Dict[str, Dict]: } ), - "Improved Staffing": + "Baseline Perfect Healthcare Seeking": mix_scenarios( - self._default_of_all_scenarios(), - { - "HealthSystem": { + self._default_of_all_max_healthsystem_scenarios(), + {"HealthSystem": { 'HR_scaling_by_level_and_officer_type_mode': "default", - }, + }}, + {'ScenarioSwitcher': { + 'max_healthsystem_function': [False] * 2, + 'max_healthcare_seeking': [True] * 2, + 'year_of_switch': 2025, + }, + } + ), + + "Baseline Perfect Clinical Practice": + mix_scenarios( + self._default_of_all_max_healthsystem_scenarios(), + {"HealthSystem": { + 'HR_scaling_by_level_and_officer_type_mode': "default", + }}, + {'ScenarioSwitcher': { + 'max_healthsystem_function': [True] * 2, + 'max_healthcare_seeking': [True] * 2, + 'year_of_switch': 2025, + }, } ), - "Improved Staffing Doubled Establishment": + "Improved Staffing": mix_scenarios( self._default_of_all_scenarios(), { "HealthSystem": { - 'HR_scaling_by_level_and_officer_type_mode': "custom_doubling", + 'HR_scaling_by_level_and_officer_type_mode': "improved_staffing", }, } ), - "Worse Case": + "Improved Perfect Healthcare Seeking": + mix_scenarios( + self._default_of_all_max_healthsystem_scenarios(), + {"HealthSystem": { + 'HR_scaling_by_level_and_officer_type_mode': "improved_staffing", + }}, + {'ScenarioSwitcher': { + 'max_healthsystem_function': [False] * 2, + 'max_healthcare_seeking': [True] * 2, + 'year_of_switch': 2025, + }, + } + ), + + "Improved Perfect Clinical Practice": + mix_scenarios( + self._default_of_all_max_healthsystem_scenarios(), + {"HealthSystem": { + 'HR_scaling_by_level_and_officer_type_mode': "improved_staffing", + }}, + {'ScenarioSwitcher': { + 'max_healthsystem_function': [True] * 2, + 'max_healthcare_seeking': [True] * 2, + 'year_of_switch': 2025, + }, + } + ), + + "Worst Case": mix_scenarios( self._default_of_all_scenarios(), { @@ -157,24 +202,35 @@ def _get_scenarios(self) -> Dict[str, Dict]: }, } ), - } - # To be sensitivity analysis - # def _baseline_scenario(self) -> Dict: - # return mix_scenarios( - # self._default_of_all_scenarios(), - # { - # 'HealthSystem': { - # 'ResourceFile_HR_scaling_by_level_and_officer_type': "historical_scaling", - # 'year_mode_switch': 2020, - # 'mode_appt_constraints_postSwitch': 2, - # 'scale_to_effective_capabilities': True, - # "use_funded_or_actual_staffing": "actual", - # "year_cons_availability_switch": 2025, - # "cons_availability_postSwitch": "all", - # }, - # }, - # ) + "Worst Perfect Healthcare Seeking": + mix_scenarios( + self._default_of_all_max_healthsystem_scenarios(), + {"HealthSystem": { + 'HR_scaling_by_level_and_officer_type_mode': "custom_worse", + }}, + {'ScenarioSwitcher': { + 'max_healthsystem_function': [False] * 2, + 'max_healthcare_seeking': [True] * 2, + 'year_of_switch': 2025, + }, + } + ), + + "Worst Perfect Clinical Practice": + mix_scenarios( + self._default_of_all_max_healthsystem_scenarios(), + {"HealthSystem": { + 'HR_scaling_by_level_and_officer_type_mode': "custom_worse", + }}, + {'ScenarioSwitcher': { + 'max_healthsystem_function': [True] * 2, + 'max_healthcare_seeking': [True] * 2, + 'year_of_switch': 2025, + }, + } + ), + } if __name__ == '__main__': From 95f402001d9fca91a12cf4c9754ecf922d290cc7 Mon Sep 17 00:00:00 2001 From: thewati Date: Tue, 3 Feb 2026 17:08:43 +0200 Subject: [PATCH 10/52] isort --- .../nurses_analyses/nurses_scenario_analyses.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/scripts/nurses_analyses/nurses_scenario_analyses.py b/src/scripts/nurses_analyses/nurses_scenario_analyses.py index f869dc9f44..164c041b5f 100644 --- a/src/scripts/nurses_analyses/nurses_scenario_analyses.py +++ b/src/scripts/nurses_analyses/nurses_scenario_analyses.py @@ -15,12 +15,10 @@ from typing import Dict from tlo import Date, logging -from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.analysis.utils import get_parameters_for_status_quo, get_root_path, mix_scenarios from tlo.methods.fullmodel import fullmodel from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher from tlo.scenario import BaseScenario -from tlo.analysis.utils import get_root_path - class StaffingScenario(BaseScenario): @@ -52,12 +50,6 @@ def modules(self): return fullmodel() + [ ImprovedHealthSystemAndCareSeekingScenarioSwitcher()] - # def modules(self): - # return ( - # fullmodel(resourcefilepath=self.resources) - # + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] - # ) - def draw_parameters(self, draw_number, rng): if draw_number < self.number_of_draws: return list(self._scenarios.values())[draw_number] @@ -131,8 +123,8 @@ def _get_scenarios(self) -> Dict[str, Dict]: mix_scenarios( self._default_of_all_max_healthsystem_scenarios(), {"HealthSystem": { - 'HR_scaling_by_level_and_officer_type_mode': "default", - }}, + 'HR_scaling_by_level_and_officer_type_mode': "default", + }}, {'ScenarioSwitcher': { 'max_healthsystem_function': [False] * 2, 'max_healthcare_seeking': [True] * 2, From 6d2ba99b90ca133ceb12122b22ce3d9ca230d5ad Mon Sep 17 00:00:00 2001 From: thewati Date: Wed, 4 Feb 2026 09:00:43 +0200 Subject: [PATCH 11/52] change start and end years --- .../nurses_scenario_analyses.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/scripts/nurses_analyses/nurses_scenario_analyses.py b/src/scripts/nurses_analyses/nurses_scenario_analyses.py index 164c041b5f..f506c16dd9 100644 --- a/src/scripts/nurses_analyses/nurses_scenario_analyses.py +++ b/src/scripts/nurses_analyses/nurses_scenario_analyses.py @@ -27,7 +27,7 @@ def __init__(self): self.resources = get_root_path() / "resources" self.seed = 0 self.start_date = Date(2010, 1, 1) - self.end_date = Date(2030, 1, 1) + self.end_date = Date(2035, 1, 1) self.pop_size = 200 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) @@ -64,12 +64,12 @@ def _default_of_all_scenarios(self) -> Dict: 'mode_appt_constraints': 1, 'mode_appt_constraints_postSwitch': 2, "scale_to_effective_capabilities": True, - "year_HR_scaling_by_level_and_officer_type": 2025, + "year_HR_scaling_by_level_and_officer_type": 2027, # This happens in the year before mode change, as the model calibration is done by that year "year_mode_switch": 2020, 'cons_availability': 'default', 'cons_availability_postSwitch': "all", - # 'year_cons_availability_switch': 2025, + # 'year_cons_availability_switch': 2027, 'yearly_HR_scaling_mode': 'historical_scaling', # for 5 years of 2020-2024; source data year 2019 "policy_name": 'Naive', "tclose_overwrite": 1, @@ -78,7 +78,7 @@ def _default_of_all_scenarios(self) -> Dict: 'ImprovedHealthSystemAndCareSeekingScenarioSwitcher': { 'max_healthcare_seeking': [False, False], 'max_healthsystem_function': [False, False], - 'year_of_switch': 2025, + 'year_of_switch': 2027, } }, ) @@ -91,12 +91,12 @@ def _default_of_all_max_healthsystem_scenarios(self) -> Dict: 'mode_appt_constraints': 1, 'mode_appt_constraints_postSwitch': 2, "scale_to_effective_capabilities": True, - "year_HR_scaling_by_level_and_officer_type": 2025, + "year_HR_scaling_by_level_and_officer_type": 2027, # This happens in the year before mode change, as the model calibration is done by that year "year_mode_switch": 2020, 'cons_availability': 'default', 'cons_availability_postSwitch': "all", - # 'year_cons_availability_switch': 2025, + # 'year_cons_availability_switch': 2027, 'yearly_HR_scaling_mode': 'historical_scaling', # for 5 years of 2020-2024; source data year 2019 "policy_name": 'Naive', "tclose_overwrite": 1, @@ -128,7 +128,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: {'ScenarioSwitcher': { 'max_healthsystem_function': [False] * 2, 'max_healthcare_seeking': [True] * 2, - 'year_of_switch': 2025, + 'year_of_switch': 2027, }, } ), @@ -142,7 +142,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: {'ScenarioSwitcher': { 'max_healthsystem_function': [True] * 2, 'max_healthcare_seeking': [True] * 2, - 'year_of_switch': 2025, + 'year_of_switch': 2027, }, } ), @@ -166,7 +166,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: {'ScenarioSwitcher': { 'max_healthsystem_function': [False] * 2, 'max_healthcare_seeking': [True] * 2, - 'year_of_switch': 2025, + 'year_of_switch': 2027, }, } ), @@ -180,7 +180,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: {'ScenarioSwitcher': { 'max_healthsystem_function': [True] * 2, 'max_healthcare_seeking': [True] * 2, - 'year_of_switch': 2025, + 'year_of_switch': 2027, }, } ), @@ -204,7 +204,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: {'ScenarioSwitcher': { 'max_healthsystem_function': [False] * 2, 'max_healthcare_seeking': [True] * 2, - 'year_of_switch': 2025, + 'year_of_switch': 2027, }, } ), @@ -218,7 +218,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: {'ScenarioSwitcher': { 'max_healthsystem_function': [True] * 2, 'max_healthcare_seeking': [True] * 2, - 'year_of_switch': 2025, + 'year_of_switch': 2027, }, } ), From d5def29708cc25dd459f54cb510f9316cd01f8c5 Mon Sep 17 00:00:00 2001 From: Tim Hallett <39991060+tbhallett@users.noreply.github.com> Date: Fri, 6 Feb 2026 10:22:17 +0000 Subject: [PATCH 12/52] TH suggestions --- .../nurses_scenario_analyses.py | 197 ++++++------------ 1 file changed, 65 insertions(+), 132 deletions(-) diff --git a/src/scripts/nurses_analyses/nurses_scenario_analyses.py b/src/scripts/nurses_analyses/nurses_scenario_analyses.py index f506c16dd9..f07117ea8d 100644 --- a/src/scripts/nurses_analyses/nurses_scenario_analyses.py +++ b/src/scripts/nurses_analyses/nurses_scenario_analyses.py @@ -1,21 +1,26 @@ """ -This scenario file sets up the scenarios for simulating the effects of nursing staffing levels -The scenarios are: -0- Baseline -1- Baseline Perfect Healthcare Seeking -2- Baseline Perfect Clinical Practice -3- Improved Staffing -4- Improved Perfect Healthcare Seeking -5- Improved Perfect Clinical Practice -6- Worst Case -7- Worst Perfect Healthcare Seeking -8- Worst Perfect Healthcare Seeking +This scenario file sets up the scenarios for simulating the effects of nursing staffing levels. + +Run on the batch system using: +``` +tlo batch-submit src/scripts/nurses_analyses/nurses_scenario_analyses.py +``` + +or locally using: +``` +tlo scenario-run src/scripts/nurses_analyses/nurses_scenario_analyses.py + ``` + + + """ + from pathlib import Path from typing import Dict from tlo import Date, logging -from tlo.analysis.utils import get_parameters_for_status_quo, get_root_path, mix_scenarios +from tlo.analysis.utils import get_parameters_for_status_quo, get_root_path, mix_scenarios, \ + get_parameters_for_hrh_historical_scaling_and_rescaling_for_mode2 from tlo.methods.fullmodel import fullmodel from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher from tlo.scenario import BaseScenario @@ -53,54 +58,30 @@ def modules(self): def draw_parameters(self, draw_number, rng): if draw_number < self.number_of_draws: return list(self._scenarios.values())[draw_number] - else: - return + def draw_name(self, draw_number) -> str: + """Store scenario name. + (This name can be retrieved by the plotting scripts to make the graphs be labelled nicely). + """ + if draw_number < self.number_of_draws: + return list(self._scenarios.keys())[draw_number] + + @property def _default_of_all_scenarios(self) -> Dict: - return mix_scenarios( - get_parameters_for_status_quo(), - { - 'HealthSystem': { - 'mode_appt_constraints': 1, - 'mode_appt_constraints_postSwitch': 2, - "scale_to_effective_capabilities": True, - "year_HR_scaling_by_level_and_officer_type": 2027, - # This happens in the year before mode change, as the model calibration is done by that year - "year_mode_switch": 2020, - 'cons_availability': 'default', - 'cons_availability_postSwitch': "all", - # 'year_cons_availability_switch': 2027, - 'yearly_HR_scaling_mode': 'historical_scaling', # for 5 years of 2020-2024; source data year 2019 - "policy_name": 'Naive', - "tclose_overwrite": 1, - "tclose_days_offset_overwrite": 7, - }, - 'ImprovedHealthSystemAndCareSeekingScenarioSwitcher': { - 'max_healthcare_seeking': [False, False], - 'max_healthsystem_function': [False, False], - 'year_of_switch': 2027, - } - }, - ) + """Base set of parameters is the standard historical scaling and transition into Mode 2.""" + return get_parameters_for_hrh_historical_scaling_and_rescaling_for_mode2() + @property def _default_of_all_max_healthsystem_scenarios(self) -> Dict: + """Improved Health System Performance: the same as the default for scenarios, but increases health system + function and healthcare seeking behaviour in 2027""" return mix_scenarios( - get_parameters_for_status_quo(), + self._default_of_all_scenarios, # <-- start with the same default set of parameters (to avoid repeating them) { - 'HealthSystem': { - 'mode_appt_constraints': 1, - 'mode_appt_constraints_postSwitch': 2, - "scale_to_effective_capabilities": True, - "year_HR_scaling_by_level_and_officer_type": 2027, - # This happens in the year before mode change, as the model calibration is done by that year - "year_mode_switch": 2020, - 'cons_availability': 'default', - 'cons_availability_postSwitch': "all", - # 'year_cons_availability_switch': 2027, - 'yearly_HR_scaling_mode': 'historical_scaling', # for 5 years of 2020-2024; source data year 2019 - "policy_name": 'Naive', - "tclose_overwrite": 1, - "tclose_days_offset_overwrite": 7, + 'ImprovedHealthSystemAndCareSeekingScenarioSwitcher': { + 'max_healthcare_seeking': [False, True], + 'max_healthsystem_function': [False, True], + 'year_of_switch': 2027, }, }, ) @@ -109,118 +90,70 @@ def _get_scenarios(self) -> Dict[str, Dict]: """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario. """ return { - "Baseline": + "Baseline Nurses / Default Healthsystem Function": mix_scenarios( - self._default_of_all_scenarios(), + self._default_of_all_scenarios, { "HealthSystem": { 'HR_scaling_by_level_and_officer_type_mode': "default", + "year_HR_scaling_by_level_and_officer_type": 2027, }, - } - ), - - "Baseline Perfect Healthcare Seeking": - mix_scenarios( - self._default_of_all_max_healthsystem_scenarios(), - {"HealthSystem": { - 'HR_scaling_by_level_and_officer_type_mode': "default", - }}, - {'ScenarioSwitcher': { - 'max_healthsystem_function': [False] * 2, - 'max_healthcare_seeking': [True] * 2, - 'year_of_switch': 2027, }, - } ), - "Baseline Perfect Clinical Practice": + "Fewer Nurses / Default Healthsystem Function": mix_scenarios( - self._default_of_all_max_healthsystem_scenarios(), - {"HealthSystem": { - 'HR_scaling_by_level_and_officer_type_mode': "default", - }}, - {'ScenarioSwitcher': { - 'max_healthsystem_function': [True] * 2, - 'max_healthcare_seeking': [True] * 2, - 'year_of_switch': 2027, + self._default_of_all_scenarios, + { + "HealthSystem": { + 'HR_scaling_by_level_and_officer_type_mode': "custom_worse", + "year_HR_scaling_by_level_and_officer_type": 2027, + }, }, - } ), - "Improved Staffing": + "More Nurses / Default Healthsystem Function": mix_scenarios( - self._default_of_all_scenarios(), + self._default_of_all_scenarios, { "HealthSystem": { 'HR_scaling_by_level_and_officer_type_mode': "improved_staffing", + "year_HR_scaling_by_level_and_officer_type": 2027, }, - } - ), - - "Improved Perfect Healthcare Seeking": - mix_scenarios( - self._default_of_all_max_healthsystem_scenarios(), - {"HealthSystem": { - 'HR_scaling_by_level_and_officer_type_mode': "improved_staffing", - }}, - {'ScenarioSwitcher': { - 'max_healthsystem_function': [False] * 2, - 'max_healthcare_seeking': [True] * 2, - 'year_of_switch': 2027, }, - } ), - "Improved Perfect Clinical Practice": + "Baseline Nurses / Improved Healthsystem Function": mix_scenarios( - self._default_of_all_max_healthsystem_scenarios(), - {"HealthSystem": { - 'HR_scaling_by_level_and_officer_type_mode': "improved_staffing", - }}, - {'ScenarioSwitcher': { - 'max_healthsystem_function': [True] * 2, - 'max_healthcare_seeking': [True] * 2, - 'year_of_switch': 2027, + self._default_of_all_max_healthsystem_scenarios, + { + "HealthSystem": { + 'HR_scaling_by_level_and_officer_type_mode': "default", + "year_HR_scaling_by_level_and_officer_type": 2027, + }, }, - } ), - "Worst Case": + "Fewer Nurses / Improved Healthsystem Function": mix_scenarios( - self._default_of_all_scenarios(), + self._default_of_all_max_healthsystem_scenarios, { "HealthSystem": { 'HR_scaling_by_level_and_officer_type_mode': "custom_worse", + "year_HR_scaling_by_level_and_officer_type": 2027, }, - } - ), - - "Worst Perfect Healthcare Seeking": - mix_scenarios( - self._default_of_all_max_healthsystem_scenarios(), - {"HealthSystem": { - 'HR_scaling_by_level_and_officer_type_mode': "custom_worse", - }}, - {'ScenarioSwitcher': { - 'max_healthsystem_function': [False] * 2, - 'max_healthcare_seeking': [True] * 2, - 'year_of_switch': 2027, }, - } ), - "Worst Perfect Clinical Practice": + "More Nurses / Improved Healthsystem Function": mix_scenarios( - self._default_of_all_max_healthsystem_scenarios(), - {"HealthSystem": { - 'HR_scaling_by_level_and_officer_type_mode': "custom_worse", - }}, - {'ScenarioSwitcher': { - 'max_healthsystem_function': [True] * 2, - 'max_healthcare_seeking': [True] * 2, - 'year_of_switch': 2027, + self._default_of_all_max_healthsystem_scenarios, + { + "HealthSystem": { + 'HR_scaling_by_level_and_officer_type_mode': "improved_staffing", + "year_HR_scaling_by_level_and_officer_type": 2027, + }, }, - } ), } From beadfee4ccad7559a1fb5ff2797f4a4d1efc2fcf Mon Sep 17 00:00:00 2001 From: Tim Hallett <39991060+tbhallett@users.noreply.github.com> Date: Fri, 6 Feb 2026 10:29:58 +0000 Subject: [PATCH 13/52] linting! --- src/scripts/nurses_analyses/nurses_scenario_analyses.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/scripts/nurses_analyses/nurses_scenario_analyses.py b/src/scripts/nurses_analyses/nurses_scenario_analyses.py index f07117ea8d..3da5335ed9 100644 --- a/src/scripts/nurses_analyses/nurses_scenario_analyses.py +++ b/src/scripts/nurses_analyses/nurses_scenario_analyses.py @@ -19,8 +19,11 @@ from typing import Dict from tlo import Date, logging -from tlo.analysis.utils import get_parameters_for_status_quo, get_root_path, mix_scenarios, \ - get_parameters_for_hrh_historical_scaling_and_rescaling_for_mode2 +from tlo.analysis.utils import ( + get_parameters_for_hrh_historical_scaling_and_rescaling_for_mode2, + get_root_path, + mix_scenarios, +) from tlo.methods.fullmodel import fullmodel from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher from tlo.scenario import BaseScenario From 481e2bf4c34db091683700852a1d2c9b7b382b84 Mon Sep 17 00:00:00 2001 From: thewati Date: Mon, 16 Feb 2026 10:06:32 +0200 Subject: [PATCH 14/52] plots of draws --- .../analysis_nurses_scenario.py | 207 ++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 src/scripts/nurses_analyses/analysis_nurses_scenario.py diff --git a/src/scripts/nurses_analyses/analysis_nurses_scenario.py b/src/scripts/nurses_analyses/analysis_nurses_scenario.py new file mode 100644 index 0000000000..6d22431431 --- /dev/null +++ b/src/scripts/nurses_analyses/analysis_nurses_scenario.py @@ -0,0 +1,207 @@ +"""This file uses the results of the results of running `nurse_analyses/nurses_scenario_analyses.py` to make some summary + graphs.""" + +import argparse +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +from tlo.analysis.utils import ( + extract_results, + get_scenario_info, + load_pickled_dataframes, + make_age_grp_lookup, + make_age_grp_types, + summarize, +) + + +def extract_total_deaths(results_folder): + def extract_deaths_total(df: pd.DataFrame) -> pd.Series: + return pd.Series({"Total": len(df)}) + + return extract_results( + results_folder, + module="tlo.methods.demography", + key="death", + custom_generate_series=extract_deaths_total, + do_scaling=True + ) + + +# def plot_summarized_total_deaths(summarized_total_deaths, param_strings): +# fig, ax = plt.subplots() +# # number_of_draws = len(param_strings) +# number_of_draws = len( +# summarized_total_deaths.columns.get_level_values(0).unique() +# ) +# +# statistic_values = { +# s: np.array( +# [summarized_total_deaths[(d, s)].values[0] for d in range(number_of_draws)] +# ) +# for s in ["mean", "lower", "upper"] +# } +# ax.bar( +# param_strings, +# statistic_values["mean"], +# yerr=[ +# statistic_values["mean"] - statistic_values["lower"], +# statistic_values["upper"] - statistic_values["mean"] +# ] +# ) +# ax.set_ylabel("Total number of deaths") +# fig.tight_layout() +# return fig, ax + +def plot_summarized_total_deaths(summarized_total_deaths): + fig, ax = plt.subplots() + + # Get actual draw IDs from the dataframe + draw_ids = summarized_total_deaths.columns.get_level_values(0).unique() + + means = np.array([ + summarized_total_deaths[(d, "mean")].values[0] for d in draw_ids + ]) + lowers = np.array([ + summarized_total_deaths[(d, "lower")].values[0] for d in draw_ids + ]) + uppers = np.array([ + summarized_total_deaths[(d, "upper")].values[0] for d in draw_ids + ]) + + ax.bar( + draw_ids, + means, + yerr=[means - lowers, uppers - means] + ) + + ax.set_ylabel("Total number of deaths") + ax.set_xlabel("Scenario draw") + fig.tight_layout() + return fig, ax + + +def compute_difference_in_deaths_across_runs(total_deaths, scenario_info): + deaths_difference_by_run = [ + total_deaths[0][run_number]["Total"] - total_deaths[1][run_number]["Total"] + for run_number in range(scenario_info["runs_per_draw"]) + ] + return np.mean(deaths_difference_by_run) + + +def extract_deaths_by_age(results_folder): + def extract_deaths_by_age_group(df: pd.DataFrame) -> pd.Series: + _, age_group_lookup = make_age_grp_lookup() + df["Age_Grp"] = df["age"].map(age_group_lookup).astype(make_age_grp_types()) + df = df.rename(columns={"sex": "Sex"}) + return df.groupby(["Age_Grp"])["person_id"].count() + + return extract_results( + results_folder, + module="tlo.methods.demography", + key="death", + custom_generate_series=extract_deaths_by_age_group, + do_scaling=True + ) + + +def plot_summarized_deaths_by_age(deaths_summarized_by_age, param_strings): + fig, ax = plt.subplots() + for i, param in enumerate(param_strings): + central_values = deaths_summarized_by_age[(i, "mean")].values + lower_values = deaths_summarized_by_age[(i, "lower")].values + upper_values = deaths_summarized_by_age[(i, "upper")].values + ax.plot( + deaths_summarized_by_age.index, central_values, + color=f"C{i}", + label=param + ) + ax.fill_between( + deaths_summarized_by_age.index, lower_values, upper_values, + alpha=0.5, + color=f"C{i}", + label="_" + ) + ax.set(xlabel="Age-Group", ylabel="Total deaths") + ax.set_xticks(deaths_summarized_by_age.index) + ax.set_xticklabels(labels=deaths_summarized_by_age.index, rotation=90) + ax.legend() + fig.tight_layout() + return fig, ax + + +if __name__ == "__main__": + # Parse command line arguments + parser = argparse.ArgumentParser( + "Analyse scenario results for testing nurses scenario" + ) + parser.add_argument( + "--scenario-outputs-folder", + type=Path, + required=True, + help="Path to folder containing scenario outputs", + ) + parser.add_argument( + "--show-figures", + action="store_true", + help="Whether to interactively show generated Matplotlib figures", + ) + parser.add_argument( + "--save-figures", + action="store_true", + help="Whether to save generated Matplotlib figures to results folder", + ) + args = parser.parse_args() + + # Find results_folder associated with a given batch_file and get most recent + # results_folder = get_scenario_outputs( + # "scenario_impact_of_consumables_availability.py", args.scenario_outputs_folder + # )[-1] + + results_folder = Path( + './outputs/wamulwafu@kuhes.ac.mw/nurses_scenario_outputs-2026-02-09T110530Z' + ) + + # Load log (useful for checking what can be extracted) + log = load_pickled_dataframes(results_folder) + + # Get basic information about the results + scenario_info = get_scenario_info(results_folder) + + # # Get the parameters that have varied over the set of simulations + # params = extract_params(results_folder) + # + # # Create a list of strings summarizing the parameter values in the different draws + # param_strings = [f"{row.module_param}={row.value}" for _, row in params.iterrows()] + + number_of_draws = scenario_info["number_of_draws"] + param_strings = [f"Draw {i}" for i in range(number_of_draws)] + + # We first look at total deaths in the scenario runs + total_deaths = extract_total_deaths(results_folder) + + # Compute and print the difference between the deaths across the scenario draws + # mean_deaths_difference_by_run = compute_difference_in_deaths_across_runs( + # total_deaths, scenario_info + # ) + # print(f"Mean difference in total deaths = {mean_deaths_difference_by_run:.3g}") + + # Plot the total deaths across the six scenario draws + # fig_1, ax_1 = plot_summarized_total_deaths(summarize(total_deaths), param_strings) + fig_1, ax_1 = plot_summarized_total_deaths(summarize(total_deaths)) + + # Age breakdown + deaths_by_age = extract_deaths_by_age(results_folder) + + # Plot the deaths by age across the six scenario draws as a line plot + fig_2, ax_2 = plot_summarized_deaths_by_age(summarize(deaths_by_age), param_strings) + + if args.show_figures: + plt.show() + + if args.save_figures: + fig_1.savefig(results_folder / "total_deaths_across_scenario_draws.pdf") + fig_2.savefig(results_folder / "death_by_age_across_scenario_draws.pdf") From 417503d82026c70042a4c57f30d87bc49b6a326b Mon Sep 17 00:00:00 2001 From: thewati Date: Fri, 20 Feb 2026 09:30:53 +0200 Subject: [PATCH 15/52] from LFS to Git --- .../custom_worse.csv | 13 ++++++++++--- .../improved_staffing.csv | 13 ++++++++++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_worse.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_worse.csv index 764f6bd018..03b651c251 100644 --- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_worse.csv +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_worse.csv @@ -1,3 +1,10 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:39c8f8e930be2330148a31442e95e041f7f45d6b9c623c635081c0c7b20ff1c1 -size 338 +Officer_Category,L0_factor,L1a_factor,L1b_factor,L2_factor,L3_factor,L4_factor,L5_factor +Clinical,1,1,1,1,1,1,1 +DCSA,1,1,1,1,1,1,1 +Dental,1,1,1,1,1,1,1 +Laboratory,1,1,1,1,1,1,1 +Mental,1,1,1,1,1,1,1 +Nursing_and_Midwifery,-0.2,-0.2,-0.2,-0.2,-0.2,-0.2,-0.2 +Nutrition,1,1,1,1,1,1,1 +Pharmacy,1,1,1,1,1,1,1 +Radiography,1,1,1,1,1,1,1 diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/improved_staffing.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/improved_staffing.csv index 800d70a992..041cedcd6e 100644 --- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/improved_staffing.csv +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/improved_staffing.csv @@ -1,3 +1,10 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9c676e4dde332f210831076cff711d73c280aed3638beba226f6418e1413dacb -size 317 +Officer_Category,L0_factor,L1a_factor,L1b_factor,L2_factor,L3_factor,L4_factor,L5_factor +Clinical,1,1,1,1,1,1,1 +DCSA,1,1,1,1,1,1,1 +Dental,1,1,1,1,1,1,1 +Laboratory,1,1,1,1,1,1,1 +Mental,1,1,1,1,1,1,1 +Nursing_and_Midwifery,2,2,2,2,2,2,2 +Nutrition,1,1,1,1,1,1,1 +Pharmacy,1,1,1,1,1,1,1 +Radiography,1,1,1,1,1,1,1 From e7c25682d342949356a1016b8e1729ba7876ef21 Mon Sep 17 00:00:00 2001 From: thewati Date: Fri, 20 Feb 2026 11:49:21 +0200 Subject: [PATCH 16/52] Label plots with names and not numbers --- .../analysis_nurses_scenario.py | 148 ++++++++---------- 1 file changed, 69 insertions(+), 79 deletions(-) diff --git a/src/scripts/nurses_analyses/analysis_nurses_scenario.py b/src/scripts/nurses_analyses/analysis_nurses_scenario.py index 6d22431431..9cf9f71a60 100644 --- a/src/scripts/nurses_analyses/analysis_nurses_scenario.py +++ b/src/scripts/nurses_analyses/analysis_nurses_scenario.py @@ -8,6 +8,7 @@ import numpy as np import pandas as pd +from scripts.nurses_analyses.nurses_scenario_analyses import StaffingScenario from tlo.analysis.utils import ( extract_results, get_scenario_info, @@ -18,6 +19,18 @@ ) +# Rename draw numbers to scenario names +def set_param_names_as_column_index_level_0(_df, param_names): + """Set column index level 0 (draw numbers) to scenario names.""" + ordered_param_names = {i: x for i, x in enumerate(param_names)} + names_of_cols_level0 = [ + ordered_param_names.get(col) + for col in _df.columns.levels[0] + ] + _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) + return _df + + def extract_total_deaths(results_folder): def extract_deaths_total(df: pd.DataFrame) -> pd.Series: return pd.Series({"Total": len(df)}) @@ -31,56 +44,35 @@ def extract_deaths_total(df: pd.DataFrame) -> pd.Series: ) -# def plot_summarized_total_deaths(summarized_total_deaths, param_strings): -# fig, ax = plt.subplots() -# # number_of_draws = len(param_strings) -# number_of_draws = len( -# summarized_total_deaths.columns.get_level_values(0).unique() -# ) -# -# statistic_values = { -# s: np.array( -# [summarized_total_deaths[(d, s)].values[0] for d in range(number_of_draws)] -# ) -# for s in ["mean", "lower", "upper"] -# } -# ax.bar( -# param_strings, -# statistic_values["mean"], -# yerr=[ -# statistic_values["mean"] - statistic_values["lower"], -# statistic_values["upper"] - statistic_values["mean"] -# ] -# ) -# ax.set_ylabel("Total number of deaths") -# fig.tight_layout() -# return fig, ax - def plot_summarized_total_deaths(summarized_total_deaths): fig, ax = plt.subplots() - # Get actual draw IDs from the dataframe - draw_ids = summarized_total_deaths.columns.get_level_values(0).unique() + scenario_names = summarized_total_deaths.columns.get_level_values(0).unique() means = np.array([ - summarized_total_deaths[(d, "mean")].values[0] for d in draw_ids + summarized_total_deaths[(s, "mean")].values[0] + for s in scenario_names ]) lowers = np.array([ - summarized_total_deaths[(d, "lower")].values[0] for d in draw_ids + summarized_total_deaths[(s, "lower")].values[0] + for s in scenario_names ]) uppers = np.array([ - summarized_total_deaths[(d, "upper")].values[0] for d in draw_ids + summarized_total_deaths[(s, "upper")].values[0] + for s in scenario_names ]) ax.bar( - draw_ids, + scenario_names, means, - yerr=[means - lowers, uppers - means] + yerr=[means - lowers, uppers - means], + capsize=5 ) ax.set_ylabel("Total number of deaths") - ax.set_xlabel("Scenario draw") + ax.set_xticklabels(scenario_names, rotation=45, ha="right") fig.tight_layout() + return fig, ax @@ -108,35 +100,41 @@ def extract_deaths_by_age_group(df: pd.DataFrame) -> pd.Series: ) -def plot_summarized_deaths_by_age(deaths_summarized_by_age, param_strings): +def plot_summarized_deaths_by_age(deaths_summarized_by_age): fig, ax = plt.subplots() - for i, param in enumerate(param_strings): - central_values = deaths_summarized_by_age[(i, "mean")].values - lower_values = deaths_summarized_by_age[(i, "lower")].values - upper_values = deaths_summarized_by_age[(i, "upper")].values + + scenario_names = deaths_summarized_by_age.columns.get_level_values(0).unique() + + for i, scenario in enumerate(scenario_names): + central_values = deaths_summarized_by_age[(scenario, "mean")].values + lower_values = deaths_summarized_by_age[(scenario, "lower")].values + upper_values = deaths_summarized_by_age[(scenario, "upper")].values + ax.plot( - deaths_summarized_by_age.index, central_values, - color=f"C{i}", - label=param + deaths_summarized_by_age.index, + central_values, + label=scenario ) + ax.fill_between( - deaths_summarized_by_age.index, lower_values, upper_values, - alpha=0.5, - color=f"C{i}", - label="_" + deaths_summarized_by_age.index, + lower_values, + upper_values, + alpha=0.3 ) + ax.set(xlabel="Age-Group", ylabel="Total deaths") ax.set_xticks(deaths_summarized_by_age.index) - ax.set_xticklabels(labels=deaths_summarized_by_age.index, rotation=90) + ax.set_xticklabels(deaths_summarized_by_age.index, rotation=90) ax.legend() fig.tight_layout() return fig, ax if __name__ == "__main__": - # Parse command line arguments + parser = argparse.ArgumentParser( - "Analyse scenario results for testing nurses scenario" + "Analyse scenario results for nurses scenario" ) parser.add_argument( "--scenario-outputs-folder", @@ -147,61 +145,53 @@ def plot_summarized_deaths_by_age(deaths_summarized_by_age, param_strings): parser.add_argument( "--show-figures", action="store_true", - help="Whether to interactively show generated Matplotlib figures", + help="Whether to interactively show figures", ) parser.add_argument( "--save-figures", action="store_true", - help="Whether to save generated Matplotlib figures to results folder", + help="Whether to save figures to results folder", ) args = parser.parse_args() - # Find results_folder associated with a given batch_file and get most recent - # results_folder = get_scenario_outputs( - # "scenario_impact_of_consumables_availability.py", args.scenario_outputs_folder - # )[-1] + # results_folder = args.scenario_outputs_folder results_folder = Path( './outputs/wamulwafu@kuhes.ac.mw/nurses_scenario_outputs-2026-02-09T110530Z' ) - # Load log (useful for checking what can be extracted) + # Load log (optional, but useful) log = load_pickled_dataframes(results_folder) - # Get basic information about the results scenario_info = get_scenario_info(results_folder) - # # Get the parameters that have varied over the set of simulations - # params = extract_params(results_folder) - # - # # Create a list of strings summarizing the parameter values in the different draws - # param_strings = [f"{row.module_param}={row.value}" for _, row in params.iterrows()] + # Get scenario names directly from Scenario class - number_of_draws = scenario_info["number_of_draws"] - param_strings = [f"Draw {i}" for i in range(number_of_draws)] + param_names = tuple(StaffingScenario()._scenarios.keys()) - # We first look at total deaths in the scenario runs - total_deaths = extract_total_deaths(results_folder) + # Total deaths + total_deaths = extract_total_deaths(results_folder).pipe( + set_param_names_as_column_index_level_0, + param_names=param_names + ) + + summarized_total_deaths = summarize(total_deaths) - # Compute and print the difference between the deaths across the scenario draws - # mean_deaths_difference_by_run = compute_difference_in_deaths_across_runs( - # total_deaths, scenario_info - # ) - # print(f"Mean difference in total deaths = {mean_deaths_difference_by_run:.3g}") + fig_1, ax_1 = plot_summarized_total_deaths(summarized_total_deaths) - # Plot the total deaths across the six scenario draws - # fig_1, ax_1 = plot_summarized_total_deaths(summarize(total_deaths), param_strings) - fig_1, ax_1 = plot_summarized_total_deaths(summarize(total_deaths)) + # Deaths by age + deaths_by_age = extract_deaths_by_age(results_folder).pipe( + set_param_names_as_column_index_level_0, + param_names=param_names + ) - # Age breakdown - deaths_by_age = extract_deaths_by_age(results_folder) + summarized_deaths_by_age = summarize(deaths_by_age) - # Plot the deaths by age across the six scenario draws as a line plot - fig_2, ax_2 = plot_summarized_deaths_by_age(summarize(deaths_by_age), param_strings) + fig_2, ax_2 = plot_summarized_deaths_by_age(summarized_deaths_by_age) if args.show_figures: plt.show() if args.save_figures: - fig_1.savefig(results_folder / "total_deaths_across_scenario_draws.pdf") - fig_2.savefig(results_folder / "death_by_age_across_scenario_draws.pdf") + fig_1.savefig(results_folder / "total_deaths_across_scenarios.pdf") + fig_2.savefig(results_folder / "deaths_by_age_across_scenarios.pdf") From 0b139b281335466b4608bdfb5248708b8744ca99 Mon Sep 17 00:00:00 2001 From: thewati Date: Tue, 24 Feb 2026 09:28:47 +0200 Subject: [PATCH 17/52] detailed plots --- .../analysis_nurses_scenario_detailed.py | 200 ++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 src/scripts/nurses_analyses/analysis_nurses_scenario_detailed.py diff --git a/src/scripts/nurses_analyses/analysis_nurses_scenario_detailed.py b/src/scripts/nurses_analyses/analysis_nurses_scenario_detailed.py new file mode 100644 index 0000000000..162c39061f --- /dev/null +++ b/src/scripts/nurses_analyses/analysis_nurses_scenario_detailed.py @@ -0,0 +1,200 @@ +""" +Analysis of nurse expansion scenarios. + +Produces: +1. Yearly nurse cadre counts (2010–2034) +2. Minutes used per cadre (focus on nurses) +3. Appointments delivered per year +4. Working time used per cadre (focus on nurses) +""" + +from pathlib import Path +import pandas as pd +import matplotlib.pyplot as plt + +from tlo.analysis.utils import ( + extract_results, + compute_mean_across_runs, +) + +START_YEAR = 2010 +END_YEAR = 2034 + + +# ============================================================================= +# COPIED + ADAPTED FUNCTIONS FROM analysis_hsi_descriptions.py +# ============================================================================= + +def get_frac_of_hcw_time_used(df): + """ + Returns minutes of time used per cadre. + Extracts the time used dictionary stored in HSI_Event output. + """ + + if "Time_Used_By_Cadre" not in df.columns: + return pd.DataFrame() + + # Expand dict column into columns + expanded = df["Time_Used_By_Cadre"].apply(pd.Series) + + expanded["date"] = df["date"] + + return expanded + + +def hcw_time_or_cost_used(df, return_time=True): + """ + Returns total minutes used per cadre (if return_time=True) + Otherwise returns cost. + """ + + column = "Time_Used_By_Cadre" if return_time else "Cost_By_Cadre" + + if column not in df.columns: + return pd.DataFrame() + + expanded = df[column].apply(pd.Series) + expanded["date"] = df["date"] + + return expanded + + +# ============================================================================= +# MAIN ANALYSIS +# ============================================================================= + +def apply(results_folder: Path, output_folder: Path): + + output_folder.mkdir(exist_ok=True, parents=True) + + # --------------------------------------------------------------------- + # 1️⃣ CADRE COUNTS + # --------------------------------------------------------------------- + + cadre_counts = extract_results( + results_folder, + module="HealthSystem", + key="Current_Number_Of_Health_Workers_By_Cadre", + ) + + cadre_counts = compute_mean_across_runs(cadre_counts) + + cadre_counts["year"] = pd.to_datetime(cadre_counts["date"]).dt.year + cadre_counts = cadre_counts[cadre_counts["year"].between(START_YEAR, END_YEAR)] + + yearly_counts = cadre_counts.groupby("year").mean() + + nurse_cols = [c for c in yearly_counts.columns if "Nurse" in c or "Midwife" in c] + + plt.figure() + for col in nurse_cols: + plt.plot(yearly_counts.index, yearly_counts[col], label=col) + + plt.title("Yearly Nurse Cadre Counts (2010–2034)") + plt.xlabel("Year") + plt.ylabel("Number of Staff") + plt.legend() + plt.tight_layout() + plt.savefig(output_folder / "yearly_nurse_cadre_counts.png") + plt.close() + + # --------------------------------------------------------------------- + # 2️⃣ MINUTES USED PER CADRE + # --------------------------------------------------------------------- + + minutes_used = extract_results( + results_folder, + module="HealthSystem", + key="HSI_Event", + custom_generate_series=get_frac_of_hcw_time_used + ) + + minutes_used = compute_mean_across_runs(minutes_used) + + minutes_used["year"] = pd.to_datetime(minutes_used["date"]).dt.year + minutes_used = minutes_used[minutes_used["year"].between(START_YEAR, END_YEAR)] + + yearly_minutes = minutes_used.groupby("year").sum() + + nurse_minutes_cols = [c for c in yearly_minutes.columns if "Nurse" in c] + + plt.figure() + for col in nurse_minutes_cols: + plt.plot(yearly_minutes.index, yearly_minutes[col], label=col) + + plt.title("Yearly Minutes Used by Nurse Cadres") + plt.xlabel("Year") + plt.ylabel("Minutes Used") + plt.legend() + plt.tight_layout() + plt.savefig(output_folder / "yearly_nurse_minutes_used.png") + plt.close() + + # --------------------------------------------------------------------- + # 3️⃣ APPOINTMENTS DELIVERED + # --------------------------------------------------------------------- + + def extract_appointments(df): + if "Number_By_Appt_Type_Code" not in df.columns: + return pd.DataFrame() + + expanded = df["Number_By_Appt_Type_Code"].apply(pd.Series) + expanded["date"] = df["date"] + return expanded + + appts = extract_results( + results_folder, + module="HealthSystem", + key="HSI_Event", + custom_generate_series=extract_appointments + ) + + appts = compute_mean_across_runs(appts) + + appts["year"] = pd.to_datetime(appts["date"]).dt.year + appts = appts[appts["year"].between(START_YEAR, END_YEAR)] + + yearly_appts = appts.groupby("year").sum() + + plt.figure() + plt.plot(yearly_appts.index, yearly_appts.sum(axis=1)) + plt.title("Total Appointments Delivered per Year") + plt.xlabel("Year") + plt.ylabel("Number of Appointments") + plt.tight_layout() + plt.savefig(output_folder / "yearly_total_appointments.png") + plt.close() + + # --------------------------------------------------------------------- + # 4️⃣ WORKING TIME USED PER CADRE + # --------------------------------------------------------------------- + + working_time = extract_results( + results_folder, + module="HealthSystem", + key="HSI_Event", + custom_generate_series=lambda df: hcw_time_or_cost_used(df, return_time=True) + ) + + working_time = compute_mean_across_runs(working_time) + + working_time["year"] = pd.to_datetime(working_time["date"]).dt.year + working_time = working_time[working_time["year"].between(START_YEAR, END_YEAR)] + + yearly_working_time = working_time.groupby("year").sum() + + nurse_time_cols = [c for c in yearly_working_time.columns if "Nurse" in c] + + plt.figure() + for col in nurse_time_cols: + plt.plot(yearly_working_time.index, yearly_working_time[col], label=col) + + plt.title("Working Time Used by Nurse Cadres") + plt.xlabel("Year") + plt.ylabel("Minutes") + plt.legend() + plt.tight_layout() + plt.savefig(output_folder / "yearly_nurse_working_time.png") + plt.close() + + print("All nurse scenario plots generated successfully.") From ea79f95ec6210654ca93ebec7d640283f34608d7 Mon Sep 17 00:00:00 2001 From: thewati Date: Wed, 25 Feb 2026 14:43:14 +0200 Subject: [PATCH 18/52] plots for nurse cadre counts and appointments over time --- .../analysis_nurses_scenario_detailed.py | 636 +++++++++++++----- 1 file changed, 484 insertions(+), 152 deletions(-) diff --git a/src/scripts/nurses_analyses/analysis_nurses_scenario_detailed.py b/src/scripts/nurses_analyses/analysis_nurses_scenario_detailed.py index 162c39061f..ae638f8a4a 100644 --- a/src/scripts/nurses_analyses/analysis_nurses_scenario_detailed.py +++ b/src/scripts/nurses_analyses/analysis_nurses_scenario_detailed.py @@ -1,200 +1,532 @@ -""" -Analysis of nurse expansion scenarios. - -Produces: -1. Yearly nurse cadre counts (2010–2034) -2. Minutes used per cadre (focus on nurses) -3. Appointments delivered per year -4. Working time used per cadre (focus on nurses) -""" +"""This file uses the results of the results of running `nurse_analyses/nurses_scenario_analyses.py` to make plots of +nurse counts over time and appointments over time for each scenario/draw name from 2010 to 2034.""" +import argparse from pathlib import Path -import pandas as pd +from typing import Tuple, Dict +import pickle + import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from scripts.nurses_analyses.nurses_scenario_analyses import StaffingScenario +from tlo import Date from tlo.analysis.utils import ( - extract_results, - compute_mean_across_runs, + load_pickled_dataframes, + summarize, ) -START_YEAR = 2010 -END_YEAR = 2034 +# Rename draw numbers to scenario names +def set_param_names_as_column_index_level_0(_df, param_names): + """Set column index level 0 (draw numbers) to scenario names.""" + ordered_param_names = {i: x for i, x in enumerate(param_names)} + names_of_cols_level0 = [ + ordered_param_names.get(col) + for col in _df.columns.levels[0] + ] + _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) + return _df -# ============================================================================= -# COPIED + ADAPTED FUNCTIONS FROM analysis_hsi_descriptions.py -# ============================================================================= -def get_frac_of_hcw_time_used(df): +def load_data_manually(results_folder: Path) -> Dict: """ - Returns minutes of time used per cadre. - Extracts the time used dictionary stored in HSI_Event output. + Manually load data from the folder structure we observed. + Folder structure: draw_folder/run_folder/pickle_files """ + data_by_draw = {} - if "Time_Used_By_Cadre" not in df.columns: - return pd.DataFrame() + # Find all draw folders (0, 1, 2, 3, 4, 5) + draw_folders = [d for d in results_folder.iterdir() if d.is_dir() and d.name.isdigit()] + draw_folders.sort(key=lambda x: int(x.name)) - # Expand dict column into columns - expanded = df["Time_Used_By_Cadre"].apply(pd.Series) + print(f"\nFound {len(draw_folders)} draw folders: {[d.name for d in draw_folders]}") - expanded["date"] = df["date"] + for draw_folder in draw_folders: + draw_num = int(draw_folder.name) + data_by_draw[draw_num] = {} - return expanded + # Find run folders (0, 1) + run_folders = [r for r in draw_folder.iterdir() if r.is_dir() and r.name.isdigit()] + run_folders.sort(key=lambda x: int(x.name)) + print(f"\nDraw {draw_num} - Found {len(run_folders)} run folders: {[r.name for r in run_folders]}") -def hcw_time_or_cost_used(df, return_time=True): - """ - Returns total minutes used per cadre (if return_time=True) - Otherwise returns cost. - """ - - column = "Time_Used_By_Cadre" if return_time else "Cost_By_Cadre" + for run_folder in run_folders: + run_num = int(run_folder.name) - if column not in df.columns: - return pd.DataFrame() + # Load all pickle files in this run folder + pickle_files = list(run_folder.glob("*.pickle")) - expanded = df[column].apply(pd.Series) - expanded["date"] = df["date"] + run_data = {} + for pickle_file in pickle_files: + try: + with open(pickle_file, 'rb') as f: + data = pickle.load(f) - return expanded + # Store by module name (filename without extension) + module_name = pickle_file.stem + run_data[module_name] = data + print(f" Loaded {module_name} from run {run_num}") + except Exception as e: + print(f" Error loading {pickle_file.name}: {e}") -# ============================================================================= -# MAIN ANALYSIS -# ============================================================================= + data_by_draw[draw_num][run_num] = run_data -def apply(results_folder: Path, output_folder: Path): + return data_by_draw - output_folder.mkdir(exist_ok=True, parents=True) - # --------------------------------------------------------------------- - # 1️⃣ CADRE COUNTS - # --------------------------------------------------------------------- - - cadre_counts = extract_results( - results_folder, - module="HealthSystem", - key="Current_Number_Of_Health_Workers_By_Cadre", - ) - - cadre_counts = compute_mean_across_runs(cadre_counts) - - cadre_counts["year"] = pd.to_datetime(cadre_counts["date"]).dt.year - cadre_counts = cadre_counts[cadre_counts["year"].between(START_YEAR, END_YEAR)] - - yearly_counts = cadre_counts.groupby("year").mean() +def extract_nurse_counts_from_run(run_data: Dict, target_years=range(2010, 2035)) -> pd.Series: + """ + Extract nurse counts from a single run's data. + Looking for the right data source - probably not number_of_hcw_staff directly. + """ + # Look for healthsystem summary data + for module_name, data in run_data.items(): + if 'healthsystem.summary' in module_name: + if isinstance(data, dict): + print(f" Examining {module_name} - keys: {list(data.keys())}") + + # First, let's check what DataFrames are available + for key in data.keys(): + if isinstance(data[key], pd.DataFrame): + df = data[key] + print(f" DataFrame '{key}' has columns: {list(df.columns)}") + + # Check if this might have nurse count data + if 'date' in df.columns: + # Look for columns that might contain nurse counts + for col in df.columns: + if 'Nursing' in str(col) or 'Midwifery' in str(col) or 'staff' in str(col).lower(): + print(f" Found potential nurse column: {col}") + + # If we find a promising DataFrame, try to extract + if 'Capacity' in key or 'staff' in key.lower(): + df['year'] = pd.to_datetime(df['date']).dt.year + df_filtered = df[df['year'].isin(target_years)] + + if not df_filtered.empty: + # Look for nursing columns + nursing_cols = [col for col in df_filtered.columns + if 'Nursing' in str(col) or 'Midwifery' in str(col)] + + if nursing_cols: + # Sum across all nursing columns + result = df_filtered.groupby('year')[nursing_cols].sum().sum(axis=1) + print(f" Found nursing columns: {nursing_cols}") + print(f" Sample values: {result.head()}") + return result + + # If no nursing columns, look for staff columns + staff_cols = [col for col in df_filtered.columns + if 'staff' in str(col).lower() or 'count' in str(col).lower()] + + if staff_cols and len(staff_cols) > 0: + # Try to get the first staff column + result = df_filtered.groupby('year')[staff_cols[0]].mean() + print(f" Using staff column: {staff_cols[0]}") + print(f" Sample values: {result.head()}") + return result + + return pd.Series(dtype=float) + + +def extract_appointments_from_run(run_data: Dict, target_years=range(2010, 2035)) -> pd.Series: + """ + Extract appointments from a single run's data. + """ + # Look for healthsystem summary data + for module_name, data in run_data.items(): + if 'healthsystem.summary' in module_name: + if isinstance(data, dict): + # Look for HSI_Event data + for key in ['HSI_Event', 'HSI_Event_non_blank_appt_footprint']: + if key in data: + df = data[key] + if isinstance(df, pd.DataFrame): + if 'date' in df.columns and 'Number_By_Appt_Type_Code' in df.columns: + df['year'] = pd.to_datetime(df['date']).dt.year - nurse_cols = [c for c in yearly_counts.columns if "Nurse" in c or "Midwife" in c] + # Filter to target years + df_filtered = df[df['year'].isin(target_years)] - plt.figure() - for col in nurse_cols: - plt.plot(yearly_counts.index, yearly_counts[col], label=col) + if not df_filtered.empty: + # Expand appointment counts + appts_expanded = df_filtered['Number_By_Appt_Type_Code'].apply(pd.Series) - plt.title("Yearly Nurse Cadre Counts (2010–2034)") - plt.xlabel("Year") - plt.ylabel("Number of Staff") - plt.legend() - plt.tight_layout() - plt.savefig(output_folder / "yearly_nurse_cadre_counts.png") - plt.close() + # Group by year and sum + appts_expanded['year'] = df_filtered['year'].values + yearly = appts_expanded.groupby('year').sum() - # --------------------------------------------------------------------- - # 2️⃣ MINUTES USED PER CADRE - # --------------------------------------------------------------------- + return yearly.sum(axis=1) - minutes_used = extract_results( - results_folder, - module="HealthSystem", - key="HSI_Event", - custom_generate_series=get_frac_of_hcw_time_used - ) + return pd.Series(dtype=float) - minutes_used = compute_mean_across_runs(minutes_used) - minutes_used["year"] = pd.to_datetime(minutes_used["date"]).dt.year - minutes_used = minutes_used[minutes_used["year"].between(START_YEAR, END_YEAR)] +def process_all_draws(data_by_draw: Dict, target_years=range(2010, 2035)): + """ + Process all draws to get nurse counts and appointments. + Returns DataFrames with draws as columns and years as index. + """ + nurse_data = {} + appt_data = {} - yearly_minutes = minutes_used.groupby("year").sum() + for draw_num, run_data_dict in data_by_draw.items(): + draw_nurse_series = [] + draw_appt_series = [] - nurse_minutes_cols = [c for c in yearly_minutes.columns if "Nurse" in c] + for run_num, run_data in run_data_dict.items(): + print(f"\n Processing Draw {draw_num}, Run {run_num}") - plt.figure() - for col in nurse_minutes_cols: - plt.plot(yearly_minutes.index, yearly_minutes[col], label=col) + # Extract nurse counts for this run + nurse_series = extract_nurse_counts_from_run(run_data, target_years) + if not nurse_series.empty: + draw_nurse_series.append(nurse_series) + print(f" ✓ Found nurse data with years: {list(nurse_series.index)[:5]}...") + print(f" ✓ Sample values: {list(nurse_series.values)[:5]}...") - plt.title("Yearly Minutes Used by Nurse Cadres") - plt.xlabel("Year") - plt.ylabel("Minutes Used") - plt.legend() - plt.tight_layout() - plt.savefig(output_folder / "yearly_nurse_minutes_used.png") - plt.close() + # Extract appointments for this run + appt_series = extract_appointments_from_run(run_data, target_years) + if not appt_series.empty: + draw_appt_series.append(appt_series) + print(f" ✓ Found appointment data with years: {list(appt_series.index)[:5]}...") - # --------------------------------------------------------------------- - # 3️⃣ APPOINTMENTS DELIVERED - # --------------------------------------------------------------------- + # Average across runs for this draw + if draw_nurse_series: + # Convert list of Series to DataFrame and compute mean + nurse_df = pd.DataFrame(draw_nurse_series) + nurse_data[draw_num] = nurse_df.mean() + print(f" Draw {draw_num}: Averaged nurse data from {len(draw_nurse_series)} runs") - def extract_appointments(df): - if "Number_By_Appt_Type_Code" not in df.columns: - return pd.DataFrame() + if draw_appt_series: + appt_df = pd.DataFrame(draw_appt_series) + appt_data[draw_num] = appt_df.mean() + print(f" Draw {draw_num}: Averaged appointment data from {len(draw_appt_series)} runs") - expanded = df["Number_By_Appt_Type_Code"].apply(pd.Series) - expanded["date"] = df["date"] - return expanded + # Convert to DataFrames with draws as columns + nurse_df = pd.DataFrame(nurse_data) if nurse_data else pd.DataFrame() + appt_df = pd.DataFrame(appt_data) if appt_data else pd.DataFrame() - appts = extract_results( - results_folder, - module="HealthSystem", - key="HSI_Event", - custom_generate_series=extract_appointments - ) + return nurse_df, appt_df - appts = compute_mean_across_runs(appts) - appts["year"] = pd.to_datetime(appts["date"]).dt.year - appts = appts[appts["year"].between(START_YEAR, END_YEAR)] +# ============================================================================= +# PLOTTING FUNCTIONS +# ============================================================================= - yearly_appts = appts.groupby("year").sum() +def plot_nurse_counts(nurse_df, param_names, output_folder, target_period_str): + """ + Plot nurse counts over time for all scenarios. + """ + if nurse_df.empty: + print("No nurse count data to plot") + return None, None + + fig, ax = plt.subplots(figsize=(14, 8)) + + # Define colors and line styles + colors = plt.cm.tab10(np.linspace(0, 1, 10)) + + # Use different markers to distinguish overlapping lines + markers = ['o', 's', '^', 'D', 'v', '<'] + + plots_made = False + + for draw_idx, scenario in enumerate(param_names): + if draw_idx in nurse_df.columns: + series = nurse_df[draw_idx] + + if series is not None and not series.empty: + plots_made = True + + # Determine label, color, and line style based on scenario name + if 'Baseline' in scenario: + level = 'Baseline' + elif 'Fewer' in scenario: + level = 'Fewer' + elif 'More' in scenario: + level = 'More' + else: + level = 'Unknown' + + if 'Default' in scenario: + hs_type = 'Default' + color = colors[0] # Blue for Default + else: # Improved + hs_type = 'Improved' + color = colors[1] # Orange for Improved + + # Line styles based on nurse level + if level == 'Baseline': + linestyle = '-' + elif level == 'Fewer': + linestyle = '--' + elif level == 'More': + linestyle = ':' + else: + linestyle = '-' + + label = f"{level} - {hs_type}" + + # Use different markers for each draw to see if lines are overlapping + marker = markers[draw_idx % len(markers)] + + ax.plot( + series.index, + series.values, + label=label, + color=color, + linestyle=linestyle, + marker=marker, + markersize=6, + markevery=3, + linewidth=2 + ) + print(f" ✓ Plotted Draw {draw_idx}: {label}") + + if not plots_made: + print(" No data to plot") + plt.close(fig) + return None, None + + ax.set_xlabel('Year', fontsize=12) + ax.set_ylabel('Number of Nurses', fontsize=12) + ax.set_title(f'Nurse Counts Over Time by Scenario ({target_period_str})', fontsize=14) + ax.grid(True, alpha=0.3) + ax.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), fontsize=10) + + # Set x-ticks + all_years = [] + for col in nurse_df.columns: + all_years.extend(nurse_df[col].index) + + if all_years: + all_years = sorted(set(all_years)) + tick_years = all_years[::2] if len(all_years) > 10 else all_years + ax.set_xticks(tick_years) + ax.set_xticklabels(tick_years, rotation=45) + + fig.tight_layout() + + # Save figures + fig.savefig(output_folder / "nurse_counts_over_time.pdf", bbox_inches='tight') + fig.savefig(output_folder / "nurse_counts_over_time.png", bbox_inches='tight', dpi=300) + + return fig, ax + + +def plot_appointments(appt_df, param_names, output_folder, target_period_str): + """ + Plot appointments over time for all scenarios. + """ + if appt_df.empty: + print("No appointment data to plot") + return None, None + + fig, ax = plt.subplots(figsize=(14, 8)) + + # Define colors and line styles + colors = plt.cm.tab10(np.linspace(0, 1, 10)) + + # Use different markers to distinguish overlapping lines + markers = ['o', 's', '^', 'D', 'v', '<'] + + plots_made = False + + for draw_idx, scenario in enumerate(param_names): + if draw_idx in appt_df.columns: + series = appt_df[draw_idx] + + if series is not None and not series.empty: + plots_made = True + + # Determine label, color, and line style based on scenario name + if 'Baseline' in scenario: + level = 'Baseline' + elif 'Fewer' in scenario: + level = 'Fewer' + elif 'More' in scenario: + level = 'More' + else: + level = 'Unknown' + + if 'Default' in scenario: + hs_type = 'Default' + color = colors[0] # Blue for Default + else: # Improved + hs_type = 'Improved' + color = colors[1] # Orange for Improved + + # Line styles based on nurse level + if level == 'Baseline': + linestyle = '-' + elif level == 'Fewer': + linestyle = '--' + elif level == 'More': + linestyle = ':' + else: + linestyle = '-' + + label = f"{level} - {hs_type}" + + # Use different markers for each draw + marker = markers[draw_idx % len(markers)] + + # Convert to millions for plotting + values_millions = series.values / 1_000_000 + + ax.plot( + series.index, + values_millions, + label=label, + color=color, + linestyle=linestyle, + marker=marker, + markersize=6, + markevery=3, + linewidth=2 + ) + print(f" ✓ Plotted Draw {draw_idx}: {label}") + + if not plots_made: + print(" No data to plot") + plt.close(fig) + return None, None + + ax.set_xlabel('Year', fontsize=12) + ax.set_ylabel('Appointments (millions)', fontsize=12) + ax.set_title(f'Total Appointments Delivered Over Time by Scenario ({target_period_str})', fontsize=14) + ax.grid(True, alpha=0.3) + ax.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), fontsize=10) + + # Set x-ticks + all_years = [] + for col in appt_df.columns: + all_years.extend(appt_df[col].index) + + if all_years: + all_years = sorted(set(all_years)) + tick_years = all_years[::2] if len(all_years) > 10 else all_years + ax.set_xticks(tick_years) + ax.set_xticklabels(tick_years, rotation=45) + + fig.tight_layout() + + # Save figures + fig.savefig(output_folder / "appointments_over_time.pdf", bbox_inches='tight') + fig.savefig(output_folder / "appointments_over_time.png", bbox_inches='tight', dpi=300) + + return fig, ax - plt.figure() - plt.plot(yearly_appts.index, yearly_appts.sum(axis=1)) - plt.title("Total Appointments Delivered per Year") - plt.xlabel("Year") - plt.ylabel("Number of Appointments") - plt.tight_layout() - plt.savefig(output_folder / "yearly_total_appointments.png") - plt.close() - # --------------------------------------------------------------------- - # 4️⃣ WORKING TIME USED PER CADRE - # --------------------------------------------------------------------- +# ============================================================================= +# MAIN +# ============================================================================= - working_time = extract_results( - results_folder, - module="HealthSystem", - key="HSI_Event", - custom_generate_series=lambda df: hcw_time_or_cost_used(df, return_time=True) +if __name__ == "__main__": + parser = argparse.ArgumentParser( + "Plot nurse counts and appointments from nurses scenario" ) - - working_time = compute_mean_across_runs(working_time) - - working_time["year"] = pd.to_datetime(working_time["date"]).dt.year - working_time = working_time[working_time["year"].between(START_YEAR, END_YEAR)] - - yearly_working_time = working_time.groupby("year").sum() - - nurse_time_cols = [c for c in yearly_working_time.columns if "Nurse" in c] - - plt.figure() - for col in nurse_time_cols: - plt.plot(yearly_working_time.index, yearly_working_time[col], label=col) - - plt.title("Working Time Used by Nurse Cadres") - plt.xlabel("Year") - plt.ylabel("Minutes") - plt.legend() - plt.tight_layout() - plt.savefig(output_folder / "yearly_nurse_working_time.png") - plt.close() - - print("All nurse scenario plots generated successfully.") + parser.add_argument( + "--scenario-outputs-folder", + type=Path, + required=True, + help="Path to folder containing scenario outputs", + ) + parser.add_argument( + "--show-figures", + action="store_true", + help="Whether to interactively show figures", + ) + parser.add_argument( + "--save-figures", + action="store_true", + help="Whether to save figures", + ) + args = parser.parse_args() + + results_folder = args.scenario_outputs_folder + + print(f"\n{'='*60}") + print(f"Loading results from: {results_folder}") + print(f"{'='*60}") + + # Get scenario names + param_names = tuple(StaffingScenario()._scenarios.keys()) + print(f"\nFound {len(param_names)} scenarios:") + for i, name in enumerate(param_names): + print(f" {i}: {name}") + + # Create output folder + output_folder = results_folder / "analysis_output" + output_folder.mkdir(exist_ok=True) + + # Define target period + target_years = range(2010, 2035) + target_period_str = "2010-2034" + + # Manually load all data + print(f"\n{'='*60}") + print("MANUALLY LOADING DATA FROM FOLDER STRUCTURE") + print(f"{'='*60}") + + data_by_draw = load_data_manually(results_folder) + + # Process all draws to extract nurse counts and appointments + print(f"\n{'='*60}") + print("EXTRACTING NURSE COUNTS AND APPOINTMENTS") + print(f"{'='*60}") + + nurse_df, appt_df = process_all_draws(data_by_draw, target_years) + + # Print summary of extracted data + print(f"\n{'='*60}") + print("EXTRACTION SUMMARY") + print(f"{'='*60}") + + if not nurse_df.empty: + print(f"\n✓ Nurse count data shape: {nurse_df.shape}") + print(f"Draws with nurse data: {list(nurse_df.columns)}") + for col in nurse_df.columns: + print(f" Draw {col}: years {list(nurse_df[col].index)[:5]}...") + print(f" Values: {list(nurse_df[col].values)[:5]}...") + else: + print("\n✗ No nurse count data found") + + if not appt_df.empty: + print(f"\n✓ Appointment data shape: {appt_df.shape}") + print(f"Draws with appointment data: {list(appt_df.columns)}") + for col in appt_df.columns: + print(f" Draw {col}: years {list(appt_df[col].index)[:5]}...") + else: + print("\n✗ No appointment data found") + + # Generate plots + print(f"\n{'='*60}") + print("GENERATING PLOTS") + print(f"{'='*60}") + + if not nurse_df.empty: + print("\nPlotting nurse counts...") + fig1, ax1 = plot_nurse_counts(nurse_df, param_names, output_folder, target_period_str) + if fig1 is not None: + print(f"✓ Nurse counts plot saved to {output_folder}/nurse_counts_over_time.png") + else: + print("\n✗ Cannot plot nurse counts - no data available") + + if not appt_df.empty: + print("\nPlotting appointments...") + fig2, ax2 = plot_appointments(appt_df, param_names, output_folder, target_period_str) + if fig2 is not None: + print(f"✓ Appointments plot saved to {output_folder}/appointments_over_time.png") + else: + print("\n✗ Cannot plot appointments - no data available") + + print(f"\n{'='*60}") + print("Analysis complete!") + print(f"{'='*60}") + + if args.show_figures: + plt.show() From 6b9660763d7df59697b7ea70640a60127dbef14c Mon Sep 17 00:00:00 2001 From: thewati Date: Wed, 11 Mar 2026 14:50:07 +0200 Subject: [PATCH 19/52] staff num more --- .../analysis_staff_num_more.py | 466 ++++++++++++++++++ .../analysis_time_and_appts.py | 333 +++++++++++++ 2 files changed, 799 insertions(+) create mode 100644 src/scripts/nurses_analyses/analysis_staff_num_more.py create mode 100644 src/scripts/nurses_analyses/analysis_time_and_appts.py diff --git a/src/scripts/nurses_analyses/analysis_staff_num_more.py b/src/scripts/nurses_analyses/analysis_staff_num_more.py new file mode 100644 index 0000000000..ddca4c504d --- /dev/null +++ b/src/scripts/nurses_analyses/analysis_staff_num_more.py @@ -0,0 +1,466 @@ +import argparse +from collections import Counter, defaultdict +from pathlib import Path +from typing import Dict, Tuple + +import numpy as np +import pandas as pd +import squarify +from matplotlib import pyplot as plt + +from tlo import Date +from tlo.analysis.utils import ( + COARSE_APPT_TYPE_TO_COLOR_MAP, + SHORT_TREATMENT_ID_TO_COLOR_MAP, + _standardize_short_treatment_id, + # DON'T import bin_hsi_event_details from utils + compute_mean_across_runs, + extract_results, + get_coarse_appt_type, + get_color_short_treatment_id, + load_pickled_dataframes, + order_of_short_treatment_ids, + plot_stacked_bar_chart, + squarify_neat, + summarize, + unflatten_flattened_multi_index_in_logging, +) +import re +from scripts.nurses_analyses.nurses_scenario_analyses import StaffingScenario + +# Declare period for which the results will be generated (defined inclusively) +TARGET_PERIOD = (Date(2010, 1, 1), Date(2034, 12, 31)) + + +def drop_outside_period(_df): + """Return a dataframe which only includes for which the date is within the limits defined by TARGET_PERIOD""" + return _df.drop(index=_df.index[~_df['date'].between(*TARGET_PERIOD)]) + + +def figure4_hr_use_overall(results_folder: Path, output_folder: Path, resourcefilepath: Path): + """ 'Figure 4': The level of usage of the HealthSystem HR Resources """ + + make_graph_file_name = lambda stub: output_folder / f"Fig4_{stub}.png" # noqa: E731 + + def get_share_of_time_for_hw_in_each_facility_by_short_treatment_id(_df): + + _df = drop_outside_period(_df) + _df = _df.set_index("date") + + nurse_cols = [ + c for c in _df.columns + if "Officer_Nursing_and_Midwifery" in c + ] + + if len(nurse_cols) == 0: + return None + + nurse_df = _df[nurse_cols] + + # Mean usage across all nurse facilities + nurse_df = nurse_df.copy() + nurse_df.loc[:, "All"] = nurse_df.mean(axis=1) + # nurse_df["All"] = nurse_df.mean(axis=1) + + return nurse_df.resample("M").mean().stack() + + def get_share_of_time_used_for_each_officer_at_each_level(_df): + + _df = drop_outside_period(_df) + _df = _df.set_index("date") + + # Columns look like: + # clinic=GenericClinic|facID_and_officer=FacilityID_0_Officer_Nursing_and_Midwifery + + officer_cols = [ + c for c in _df.columns if "FacilityID_" in c and "Officer_" in c + ] + + if len(officer_cols) == 0: + return None + + officer_df = _df[officer_cols].copy() + + # Load Master Facility List + mfl = pd.read_csv( + Path("./resources/healthsystem/organisation/ResourceFile_Master_Facilities_List.csv") + ).set_index("Facility_ID") + + results = [] + + for col in officer_cols: + + col_string = str(col) + + # Extract facility ID + fac_match = re.search(r'FacilityID_(\d+)', col_string) + if fac_match is None: + continue + fid = int(fac_match.group(1)) + + # Extract cadre + officer_match = re.search(r'Officer_(.*)', col_string) + if officer_match is None: + continue + cadre = officer_match.group(1) + + # Get facility level + if fid not in mfl.index: + continue + + level = mfl.loc[fid, "Facility_Level"] + level = "2" if level == "1b" else level + + # Compute mean usage + mean_val = officer_df[col].mean() + + results.append((cadre, level, mean_val)) + + if len(results) == 0: + return None + + result_df = pd.DataFrame(results, columns=["Cadre", "Facility_Level", "Usage"]) + + return result_df.groupby(["Cadre", "Facility_Level"])["Usage"].mean() + + capacity_by_facility = summarize( + extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Capacity_By_FacID_and_Officer', + custom_generate_series=get_share_of_time_for_hw_in_each_facility_by_short_treatment_id, + do_scaling=False + ), + only_mean=True, + collapse_columns=True + ) + + capacity_by_officer = summarize( + extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Capacity_By_FacID_and_Officer', + custom_generate_series=get_share_of_time_used_for_each_officer_at_each_level, + do_scaling=False + ), + only_mean=True, + collapse_columns=True + ) + + # Find the levels of each facility + mfl = pd.read_csv( + resourcefilepath / 'healthsystem' / 'organisation' / 'ResourceFile_Master_Facilities_List.csv' + ).set_index('Facility_ID') + + def find_level_for_facility(col_name): + # Skip aggregated column + if col_name == "All": + return None + + match = re.search(r'FacilityID_(\d+)', str(col_name)) + + if match is None: + return None + + fid = int(match.group(1)) + + level = mfl.loc[fid, "Facility_Level"] + + return "2" if level == "1b" else level + + # def find_level_for_facility(col_tuple): + # # Extract the text part + # col_string = col_tuple[2] + # + # # Extract facility ID number + # match = re.search(r'FacilityID_(\d+)', col_string) + # fid = int(match.group(1)) + # + # level = mfl.loc[fid, "Facility_Level"] + # return "2" if level == "1b" else level + # def find_level_for_facility(id): + # return mfl.loc[id].Facility_Level if mfl.loc[id].Facility_Level != '1b' else '2' + # def find_level_for_facility(fid): + # level = mfl.loc[fid, "Facility_Level"] + # return "2" if level == "1b" else level + + color_for_level = {'0': 'blue', '1a': 'yellow', '1b': 'green', '2': 'grey', '3': 'orange', '4': 'black', + '5': 'white'} + + fig, ax = plt.subplots() + name_of_plot = 'Usage of Healthcare Worker Time By Month' + capacity_unstacked = capacity_by_facility.unstack() + for i in capacity_unstacked.columns: + + level = find_level_for_facility(i) + + if level is None: + continue + + h1, = ax.plot( + capacity_unstacked[i].index, + capacity_unstacked[i].values, + color=color_for_level[level], + linewidth=0.5, + label=f'Facility_Level {level}' + ) + # for i in capacity_unstacked.columns: + # if i != 'All': + # level = find_level_for_facility(i) + # h1, = ax.plot(capacity_unstacked[i].index, capacity_unstacked[i].values, + # color=color_for_level[level], linewidth=0.5, label=f'Facility_Level {level}') + + if 'All' in capacity_unstacked.columns: + h2, = ax.plot( + capacity_unstacked['All'].index, + capacity_unstacked['All'].values, + color='red', + linewidth=1.5 + ) + ax.legend([h1, h2], ['Each Facility', 'All Facilities']) + else: + ax.legend([h1], ['Each Facility']) + + ax.set_title(name_of_plot) + ax.set_xlabel('Month') + ax.set_ylabel('Fraction of all time used\n(Average for the month)') + + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_'))) + plt.close(fig) + + fig, ax = plt.subplots() + name_of_plot = 'Usage of Healthcare Worker Time (Average)' + capacity_unstacked_average = capacity_by_facility.unstack().mean() + # levels = [find_level_for_facility(i) if i != 'All' else 'All' for i in capacity_unstacked_average.index] + xpos_for_level = dict(zip((color_for_level.keys()), range(len(color_for_level)))) + xpos_for_level.update({'1b': 2, '2': 2, '3': 3, '4': 4, '5': 5}) + for id, val in capacity_unstacked_average.items(): + if id != 'All': + _level = find_level_for_facility(id) + + # Skip if facility level could not be determined + if _level is None: + continue + + if _level != '5': + xpos = xpos_for_level[_level] + scatter = (np.random.rand() - 0.5) * 0.25 + h1, = ax.plot(xpos + scatter, val * 100, color=color_for_level[_level], + marker='.', markersize=15, label='Each Facility', linestyle='none') + if 'All' in capacity_unstacked_average.index: + h2 = ax.axhline( + y=capacity_unstacked_average['All'] * 100, + color='red', + linestyle='--', + label='Average' + ) + ax.set_title(name_of_plot) + ax.set_xlabel('Facility_Level') + ax.set_xticks(list(xpos_for_level.values())) + ax.set_xticklabels(xpos_for_level.keys()) + ax.set_ylabel('Percent of Time Available That is Used\n') + ax.legend(handles=[h1, h2]) + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_'))) + plt.close(fig) + + fig, ax = plt.subplots() + name_of_plot = 'Usage of Healthcare Worker Time by Cadre and Facility_Level' + (100.0 * capacity_by_officer.unstack()).T.plot.bar(ax=ax) + ax.legend() + ax.set_xlabel('Facility_Level') + ax.set_ylabel('Percent of time that is used') + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + ax.set_title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_'))) + plt.close(fig) + + +def get_yearly_hr_count(_df): + + if 'GenericClinic' not in _df.columns: + return None + + _df['year'] = _df['date'].dt.year + + # Expand facility dictionary + staff_df = _df['GenericClinic'].apply(pd.Series) + + # Extract cadre names + staff_df.columns = [c.split('Officer_')[-1] for c in staff_df.columns] + + # Sum facilities within cadre + staff_df = staff_df.groupby(level=0, axis=1).sum() + + # Add year + staff_df['year'] = _df['year'] + + # Sum within year + staff_df = staff_df.groupby('year').sum() + POP_SCALE = 145.39609 + # POP_SCALE = 1000 + staff_df = staff_df * POP_SCALE + + # Convert to stacked series (year,cadre → value) + return staff_df.stack() + + +def extract_staff_counts(results_folder): + return extract_results( + results_folder, + module="tlo.methods.healthsystem.summary", + key="number_of_hcw_staff", + custom_generate_series=get_yearly_hr_count, + do_scaling=False + ) + + +def set_param_names_as_column_index_level_0(_df, param_names): + """Set column index level 0 (draw numbers) to scenario names.""" + ordered_param_names = {i: x for i, x in enumerate(param_names)} + names_of_cols_level0 = [ + ordered_param_names.get(col) + for col in _df.columns.levels[0] + ] + _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) + return _df + + +def plot_staff_counts_by_cadre_across_scenarios(staff_counts_summary, output_folder): + scenario_names = staff_counts_summary.columns.get_level_values(0).unique() + cadres = staff_counts_summary.index.get_level_values(1).unique() + + for cadre in cadres: + + fig, ax = plt.subplots() + + for scenario in scenario_names: + + central = staff_counts_summary[(scenario, "mean")].xs(cadre, level=1) + lower = staff_counts_summary[(scenario, "lower")].xs(cadre, level=1) + upper = staff_counts_summary[(scenario, "upper")].xs(cadre, level=1) + + years = central.index + + ax.plot( + years, + central.values, + label=scenario + ) + + # ax.fill_between( + # years, + # lower.values, + # upper.values, + # alpha=0.25 + # ) + ax.fill_between( + years, + np.maximum(lower.values, 0), + upper.values, + alpha=0.25 + ) + + ax.set_title(f"{cadre} Staff Counts Across Scenarios") + ax.set_xlabel("Year") + ax.set_ylabel("Number of Health Workers") + + ax.legend() + + fig.tight_layout() + + fig.savefig(output_folder / f"{cadre}_staff_counts_across_scenarios.png") + + plt.close(fig) + + +def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None): + """Description of the usage of healthcare system resources.""" + + # figure2_appointments_used( + # results_folder=results_folder, output_folder=output_folder, resourcefilepath=resourcefilepath + # ) + log = load_pickled_dataframes(results_folder, 0, 0) + print(log.keys()) + + print(log['tlo.methods.healthsystem.summary'].keys()) + + # STEP 1: extract staff counts + staff_counts = extract_staff_counts(results_folder) + + # STEP 2: rename draws to scenario names + param_names = tuple(StaffingScenario()._scenarios.keys()) + + staff_counts = staff_counts.pipe( + set_param_names_as_column_index_level_0, + param_names=param_names + ) + + # STEP 3: summarize runs + print(type(staff_counts)) + print(staff_counts.head()) + staff_counts_summary = summarize(staff_counts) + + print("\n=== Staff counts from 2025–2034 ===") + + # Select years 2025–2034 + years_to_check = range(2025, 2035) + + export_df = staff_counts_summary.reset_index() + + # Filter the years + export_df = export_df[export_df["year"].isin(years_to_check)] + + # Save to Excel + export_path = output_folder / "debug_staff_counts_2025_2034.xlsx" + export_df.to_excel(export_path) + + print(f"Staff counts exported to: {export_path}") + + # STEP 4: plot + plot_staff_counts_by_cadre_across_scenarios(staff_counts_summary, output_folder) + + figure4_hr_use_overall( + results_folder=results_folder, output_folder=output_folder, resourcefilepath=resourcefilepath + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument( + "--scenario-outputs-folder", + type=Path, + required=True, + help="Path to folder containing scenario outputs", + ) + parser.add_argument( + "--show-figures", + action="store_true", + help="Whether to interactively show figures", + ) + parser.add_argument( + "--save-figures", + action="store_true", + help="Whether to save figures", + ) + args = parser.parse_args() + + # Use the command-line argument instead of hardcoded path + results_folder = args.scenario_outputs_folder + # results_folder = Path( + # './outputs/wamulwafu@kuhes.ac.mw/nurses_scenario_outputs-2026-02-09T110530Z' + # ) + + apply( + results_folder=results_folder, # or directly: args.scenario_outputs_folder + output_folder=results_folder, + resourcefilepath=Path('./resources') + ) diff --git a/src/scripts/nurses_analyses/analysis_time_and_appts.py b/src/scripts/nurses_analyses/analysis_time_and_appts.py new file mode 100644 index 0000000000..aaa5f0d0c6 --- /dev/null +++ b/src/scripts/nurses_analyses/analysis_time_and_appts.py @@ -0,0 +1,333 @@ +import argparse +from collections import Counter, defaultdict +from pathlib import Path +from typing import Dict, Tuple + +import numpy as np +import pandas as pd +import squarify +from matplotlib import pyplot as plt + +from tlo import Date +from tlo.analysis.utils import ( + COARSE_APPT_TYPE_TO_COLOR_MAP, + SHORT_TREATMENT_ID_TO_COLOR_MAP, + _standardize_short_treatment_id, + # DON'T import bin_hsi_event_details from utils + compute_mean_across_runs, + extract_results, + get_coarse_appt_type, + get_color_short_treatment_id, + load_pickled_dataframes, + order_of_short_treatment_ids, + plot_stacked_bar_chart, + squarify_neat, + summarize, + unflatten_flattened_multi_index_in_logging, +) +import re + +# Declare period for which the results will be generated (defined inclusively) +TARGET_PERIOD = (Date(2010, 1, 1), Date(2034, 12, 31)) + + +def drop_outside_period(_df): + """Return a dataframe which only includes for which the date is within the limits defined by TARGET_PERIOD""" + return _df.drop(index=_df.index[~_df['date'].between(*TARGET_PERIOD)]) + + +def figure4_hr_use_overall(results_folder: Path, output_folder: Path, resourcefilepath: Path): + """ 'Figure 4': The level of usage of the HealthSystem HR Resources """ + + make_graph_file_name = lambda stub: output_folder / f"Fig4_{stub}.png" # noqa: E731 + + def get_share_of_time_for_hw_in_each_facility_by_short_treatment_id(_df): + + _df = drop_outside_period(_df) + _df = _df.set_index("date") + + nurse_cols = [ + c for c in _df.columns + if "Officer_Nursing_and_Midwifery" in c + ] + + if len(nurse_cols) == 0: + return None + + nurse_df = _df[nurse_cols] + + # Mean usage across all nurse facilities + nurse_df = nurse_df.copy() + nurse_df.loc[:, "All"] = nurse_df.mean(axis=1) + # nurse_df["All"] = nurse_df.mean(axis=1) + + return nurse_df.resample("M").mean().stack() + + def get_share_of_time_used_for_each_officer_at_each_level(_df): + + _df = drop_outside_period(_df) + _df = _df.set_index("date") + + # Columns look like: + # clinic=GenericClinic|facID_and_officer=FacilityID_0_Officer_Nursing_and_Midwifery + + officer_cols = [ + c for c in _df.columns if "FacilityID_" in c and "Officer_" in c + ] + + if len(officer_cols) == 0: + return None + + officer_df = _df[officer_cols].copy() + + # Load Master Facility List + mfl = pd.read_csv( + Path("./resources/healthsystem/organisation/ResourceFile_Master_Facilities_List.csv") + ).set_index("Facility_ID") + + results = [] + + for col in officer_cols: + + col_string = str(col) + + # Extract facility ID + fac_match = re.search(r'FacilityID_(\d+)', col_string) + if fac_match is None: + continue + fid = int(fac_match.group(1)) + + # Extract cadre + officer_match = re.search(r'Officer_(.*)', col_string) + if officer_match is None: + continue + cadre = officer_match.group(1) + + # Get facility level + if fid not in mfl.index: + continue + + level = mfl.loc[fid, "Facility_Level"] + level = "2" if level == "1b" else level + + # Compute mean usage + mean_val = officer_df[col].mean() + + results.append((cadre, level, mean_val)) + + if len(results) == 0: + return None + + result_df = pd.DataFrame(results, columns=["Cadre", "Facility_Level", "Usage"]) + + return result_df.groupby(["Cadre", "Facility_Level"])["Usage"].mean() + + capacity_by_facility = summarize( + extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Capacity_By_FacID_and_Officer', + custom_generate_series=get_share_of_time_for_hw_in_each_facility_by_short_treatment_id, + do_scaling=False + ), + only_mean=True, + collapse_columns=True + ) + + capacity_by_officer = summarize( + extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Capacity_By_FacID_and_Officer', + custom_generate_series=get_share_of_time_used_for_each_officer_at_each_level, + do_scaling=False + ), + only_mean=True, + collapse_columns=True + ) + + # Find the levels of each facility + mfl = pd.read_csv( + resourcefilepath / 'healthsystem' / 'organisation' / 'ResourceFile_Master_Facilities_List.csv' + ).set_index('Facility_ID') + + def find_level_for_facility(col_name): + # Skip aggregated column + if col_name == "All": + return None + + match = re.search(r'FacilityID_(\d+)', str(col_name)) + + if match is None: + return None + + fid = int(match.group(1)) + + level = mfl.loc[fid, "Facility_Level"] + + return "2" if level == "1b" else level + + # def find_level_for_facility(col_tuple): + # # Extract the text part + # col_string = col_tuple[2] + # + # # Extract facility ID number + # match = re.search(r'FacilityID_(\d+)', col_string) + # fid = int(match.group(1)) + # + # level = mfl.loc[fid, "Facility_Level"] + # return "2" if level == "1b" else level + # def find_level_for_facility(id): + # return mfl.loc[id].Facility_Level if mfl.loc[id].Facility_Level != '1b' else '2' + # def find_level_for_facility(fid): + # level = mfl.loc[fid, "Facility_Level"] + # return "2" if level == "1b" else level + + color_for_level = {'0': 'blue', '1a': 'yellow', '1b': 'green', '2': 'grey', '3': 'orange', '4': 'black', + '5': 'white'} + + fig, ax = plt.subplots() + name_of_plot = 'Usage of Healthcare Worker Time By Month' + capacity_unstacked = capacity_by_facility.unstack() + for i in capacity_unstacked.columns: + + level = find_level_for_facility(i) + + if level is None: + continue + + h1, = ax.plot( + capacity_unstacked[i].index, + capacity_unstacked[i].values, + color=color_for_level[level], + linewidth=0.5, + label=f'Facility_Level {level}' + ) + # for i in capacity_unstacked.columns: + # if i != 'All': + # level = find_level_for_facility(i) + # h1, = ax.plot(capacity_unstacked[i].index, capacity_unstacked[i].values, + # color=color_for_level[level], linewidth=0.5, label=f'Facility_Level {level}') + + if 'All' in capacity_unstacked.columns: + h2, = ax.plot( + capacity_unstacked['All'].index, + capacity_unstacked['All'].values, + color='red', + linewidth=1.5 + ) + ax.legend([h1, h2], ['Each Facility', 'All Facilities']) + else: + ax.legend([h1], ['Each Facility']) + + ax.set_title(name_of_plot) + ax.set_xlabel('Month') + ax.set_ylabel('Fraction of all time used\n(Average for the month)') + + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_'))) + plt.close(fig) + + fig, ax = plt.subplots() + name_of_plot = 'Usage of Healthcare Worker Time (Average)' + capacity_unstacked_average = capacity_by_facility.unstack().mean() + # levels = [find_level_for_facility(i) if i != 'All' else 'All' for i in capacity_unstacked_average.index] + xpos_for_level = dict(zip((color_for_level.keys()), range(len(color_for_level)))) + xpos_for_level.update({'1b': 2, '2': 2, '3': 3, '4': 4, '5': 5}) + for id, val in capacity_unstacked_average.items(): + if id != 'All': + _level = find_level_for_facility(id) + + # Skip if facility level could not be determined + if _level is None: + continue + + if _level != '5': + xpos = xpos_for_level[_level] + scatter = (np.random.rand() - 0.5) * 0.25 + h1, = ax.plot(xpos + scatter, val * 100, color=color_for_level[_level], + marker='.', markersize=15, label='Each Facility', linestyle='none') + if 'All' in capacity_unstacked_average.index: + h2 = ax.axhline( + y=capacity_unstacked_average['All'] * 100, + color='red', + linestyle='--', + label='Average' + ) + ax.set_title(name_of_plot) + ax.set_xlabel('Facility_Level') + ax.set_xticks(list(xpos_for_level.values())) + ax.set_xticklabels(xpos_for_level.keys()) + ax.set_ylabel('Percent of Time Available That is Used\n') + ax.legend(handles=[h1, h2]) + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_'))) + plt.close(fig) + + fig, ax = plt.subplots() + name_of_plot = 'Usage of Healthcare Worker Time by Cadre and Facility_Level' + (100.0 * capacity_by_officer.unstack()).T.plot.bar(ax=ax) + ax.legend() + ax.set_xlabel('Facility_Level') + ax.set_ylabel('Percent of time that is used') + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + ax.set_title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_'))) + plt.close(fig) + + +def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None): + """Description of the usage of healthcare system resources.""" + + # figure2_appointments_used( + # results_folder=results_folder, output_folder=output_folder, resourcefilepath=resourcefilepath + # ) + from tlo.analysis.utils import load_pickled_dataframes + log = load_pickled_dataframes(results_folder, 0, 0) + print(log.keys()) + + print(log['tlo.methods.healthsystem.summary'].keys()) + + figure4_hr_use_overall( + results_folder=results_folder, output_folder=output_folder, resourcefilepath=resourcefilepath + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument( + "--scenario-outputs-folder", + type=Path, + required=True, + help="Path to folder containing scenario outputs", + ) + parser.add_argument( + "--show-figures", + action="store_true", + help="Whether to interactively show figures", + ) + parser.add_argument( + "--save-figures", + action="store_true", + help="Whether to save figures", + ) + args = parser.parse_args() + + # Use the command-line argument instead of hardcoded path + results_folder = args.scenario_outputs_folder + # results_folder = Path( + # './outputs/wamulwafu@kuhes.ac.mw/nurses_scenario_outputs-2026-02-09T110530Z' + # ) + + apply( + results_folder=results_folder, # or directly: args.scenario_outputs_folder + output_folder=results_folder, + resourcefilepath=Path('./resources') + ) From 0b7c8a40c1cf3e58b994ed3b4a33f0a3ea2c7ef0 Mon Sep 17 00:00:00 2001 From: thewati Date: Mon, 20 Apr 2026 11:09:13 +0200 Subject: [PATCH 20/52] update for next run --- .../custom_worse.csv | 2 +- .../improved_staffing.csv | 2 +- src/scripts/nurses_analyses/nurses_scenario_analyses.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_worse.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_worse.csv index 03b651c251..be1dc97f7f 100644 --- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_worse.csv +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_worse.csv @@ -4,7 +4,7 @@ DCSA,1,1,1,1,1,1,1 Dental,1,1,1,1,1,1,1 Laboratory,1,1,1,1,1,1,1 Mental,1,1,1,1,1,1,1 -Nursing_and_Midwifery,-0.2,-0.2,-0.2,-0.2,-0.2,-0.2,-0.2 +Nursing_and_Midwifery,0.85,0.85,0.85,0.85,0.85,0.85,0.85 Nutrition,1,1,1,1,1,1,1 Pharmacy,1,1,1,1,1,1,1 Radiography,1,1,1,1,1,1,1 diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/improved_staffing.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/improved_staffing.csv index 041cedcd6e..953c74107f 100644 --- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/improved_staffing.csv +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/improved_staffing.csv @@ -4,7 +4,7 @@ DCSA,1,1,1,1,1,1,1 Dental,1,1,1,1,1,1,1 Laboratory,1,1,1,1,1,1,1 Mental,1,1,1,1,1,1,1 -Nursing_and_Midwifery,2,2,2,2,2,2,2 +Nursing_and_Midwifery,1.455,1.455,1.455,1.455,1.455,1.455,1.455 Nutrition,1,1,1,1,1,1,1 Pharmacy,1,1,1,1,1,1,1 Radiography,1,1,1,1,1,1,1 diff --git a/src/scripts/nurses_analyses/nurses_scenario_analyses.py b/src/scripts/nurses_analyses/nurses_scenario_analyses.py index 3da5335ed9..5202c29c8f 100644 --- a/src/scripts/nurses_analyses/nurses_scenario_analyses.py +++ b/src/scripts/nurses_analyses/nurses_scenario_analyses.py @@ -36,10 +36,10 @@ def __init__(self): self.seed = 0 self.start_date = Date(2010, 1, 1) self.end_date = Date(2035, 1, 1) - self.pop_size = 200 + self.pop_size = 100000 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) - self.runs_per_draw = 2 + self.runs_per_draw = 5 def log_configuration(self): return { From 2befeab5f77f6caaa5ccdae74fefabbe83ff9a4a Mon Sep 17 00:00:00 2001 From: thewati Date: Mon, 20 Apr 2026 11:16:15 +0200 Subject: [PATCH 21/52] comment out unused --- src/scripts/nurses_analyses/analysis_time_and_appts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scripts/nurses_analyses/analysis_time_and_appts.py b/src/scripts/nurses_analyses/analysis_time_and_appts.py index aaa5f0d0c6..a7c22e69a9 100644 --- a/src/scripts/nurses_analyses/analysis_time_and_appts.py +++ b/src/scripts/nurses_analyses/analysis_time_and_appts.py @@ -18,7 +18,7 @@ extract_results, get_coarse_appt_type, get_color_short_treatment_id, - load_pickled_dataframes, + # load_pickled_dataframes, order_of_short_treatment_ids, plot_stacked_bar_chart, squarify_neat, From 2413c7cb7031feba3870378b35d0d8b7ba100875 Mon Sep 17 00:00:00 2001 From: thewati Date: Wed, 13 May 2026 09:14:46 +0200 Subject: [PATCH 22/52] deaths, staff counts and in districts --- .../analysis_nurses_scenario.py | 2 +- .../analysis_staff_num_more_districts.py | 531 ++++++++++++++++++ 2 files changed, 532 insertions(+), 1 deletion(-) create mode 100644 src/scripts/nurses_analyses/analysis_staff_num_more_districts.py diff --git a/src/scripts/nurses_analyses/analysis_nurses_scenario.py b/src/scripts/nurses_analyses/analysis_nurses_scenario.py index 9cf9f71a60..8f8e84ea5c 100644 --- a/src/scripts/nurses_analyses/analysis_nurses_scenario.py +++ b/src/scripts/nurses_analyses/analysis_nurses_scenario.py @@ -157,7 +157,7 @@ def plot_summarized_deaths_by_age(deaths_summarized_by_age): # results_folder = args.scenario_outputs_folder results_folder = Path( - './outputs/wamulwafu@kuhes.ac.mw/nurses_scenario_outputs-2026-02-09T110530Z' + './outputs/wamulwafu@kuhes.ac.mw/nurses_scenario_outputs-2026-04-20T111238Z' ) # Load log (optional, but useful) diff --git a/src/scripts/nurses_analyses/analysis_staff_num_more_districts.py b/src/scripts/nurses_analyses/analysis_staff_num_more_districts.py new file mode 100644 index 0000000000..077b57c5d5 --- /dev/null +++ b/src/scripts/nurses_analyses/analysis_staff_num_more_districts.py @@ -0,0 +1,531 @@ +import argparse +from collections import Counter, defaultdict +from pathlib import Path +from typing import Dict, Tuple + +import numpy as np +import pandas as pd +import squarify +from matplotlib import pyplot as plt + +from tlo import Date +from tlo.analysis.utils import ( + COARSE_APPT_TYPE_TO_COLOR_MAP, + SHORT_TREATMENT_ID_TO_COLOR_MAP, + _standardize_short_treatment_id, + # DON'T import bin_hsi_event_details from utils + compute_mean_across_runs, + extract_results, + get_coarse_appt_type, + get_color_short_treatment_id, + load_pickled_dataframes, + order_of_short_treatment_ids, + plot_stacked_bar_chart, + squarify_neat, + summarize, + unflatten_flattened_multi_index_in_logging, +) +import re +from scripts.nurses_analyses.nurses_scenario_analyses import StaffingScenario + +# Declare period for which the results will be generated (defined inclusively) +TARGET_PERIOD = (Date(2010, 1, 1), Date(2034, 12, 31)) + + +def drop_outside_period(_df): + """Return a dataframe which only includes for which the date is within the limits defined by TARGET_PERIOD""" + return _df.drop(index=_df.index[~_df['date'].between(*TARGET_PERIOD)]) + + +def figure4_hr_use_overall(results_folder: Path, output_folder: Path, resourcefilepath: Path): + """ 'Figure 4': The level of usage of the HealthSystem HR Resources """ + + make_graph_file_name = lambda stub: output_folder / f"Fig4_{stub}.png" # noqa: E731 + + def get_share_of_time_for_hw_in_each_facility_by_short_treatment_id(_df): + + _df = drop_outside_period(_df) + _df = _df.set_index("date") + + nurse_cols = [ + c for c in _df.columns + if "Officer_Nursing_and_Midwifery" in c + ] + + if len(nurse_cols) == 0: + return None + + nurse_df = _df[nurse_cols] + + # Mean usage across all nurse facilities + nurse_df = nurse_df.copy() + nurse_df.loc[:, "All"] = nurse_df.mean(axis=1) + # nurse_df["All"] = nurse_df.mean(axis=1) + + return nurse_df.resample("M").mean().stack() + + def get_share_of_time_used_for_each_officer_at_each_level(_df): + + _df = drop_outside_period(_df) + _df = _df.set_index("date") + + # Columns look like: + # clinic=GenericClinic|facID_and_officer=FacilityID_0_Officer_Nursing_and_Midwifery + + officer_cols = [ + c for c in _df.columns if "FacilityID_" in c and "Officer_" in c + ] + + if len(officer_cols) == 0: + return None + + officer_df = _df[officer_cols].copy() + + # Load Master Facility List + mfl = pd.read_csv( + Path("./resources/healthsystem/organisation/ResourceFile_Master_Facilities_List.csv") + ).set_index("Facility_ID") + + results = [] + + for col in officer_cols: + + col_string = str(col) + + # Extract facility ID + fac_match = re.search(r'FacilityID_(\d+)', col_string) + if fac_match is None: + continue + fid = int(fac_match.group(1)) + + # Extract cadre + officer_match = re.search(r'Officer_(.*)', col_string) + if officer_match is None: + continue + cadre = officer_match.group(1) + + # Get facility level + if fid not in mfl.index: + continue + + level = mfl.loc[fid, "Facility_Level"] + level = "2" if level == "1b" else level + + # Compute mean usage + mean_val = officer_df[col].mean() + + results.append((cadre, level, mean_val)) + + if len(results) == 0: + return None + + result_df = pd.DataFrame(results, columns=["Cadre", "Facility_Level", "Usage"]) + + return result_df.groupby(["Cadre", "Facility_Level"])["Usage"].mean() + + capacity_by_facility = summarize( + extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Capacity_By_FacID_and_Officer', + custom_generate_series=get_share_of_time_for_hw_in_each_facility_by_short_treatment_id, + do_scaling=False + ), + only_mean=True, + collapse_columns=True + ) + + capacity_by_officer = summarize( + extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Capacity_By_FacID_and_Officer', + custom_generate_series=get_share_of_time_used_for_each_officer_at_each_level, + do_scaling=False + ), + only_mean=True, + collapse_columns=True + ) + + # Find the levels of each facility + mfl = pd.read_csv( + resourcefilepath / 'healthsystem' / 'organisation' / 'ResourceFile_Master_Facilities_List.csv' + ).set_index('Facility_ID') + + def find_level_for_facility(col_name): + # Skip aggregated column + if col_name == "All": + return None + + match = re.search(r'FacilityID_(\d+)', str(col_name)) + + if match is None: + return None + + fid = int(match.group(1)) + + level = mfl.loc[fid, "Facility_Level"] + + return "2" if level == "1b" else level + + # def find_level_for_facility(col_tuple): + # # Extract the text part + # col_string = col_tuple[2] + # + # # Extract facility ID number + # match = re.search(r'FacilityID_(\d+)', col_string) + # fid = int(match.group(1)) + # + # level = mfl.loc[fid, "Facility_Level"] + # return "2" if level == "1b" else level + # def find_level_for_facility(id): + # return mfl.loc[id].Facility_Level if mfl.loc[id].Facility_Level != '1b' else '2' + # def find_level_for_facility(fid): + # level = mfl.loc[fid, "Facility_Level"] + # return "2" if level == "1b" else level + + color_for_level = {'0': 'blue', '1a': 'yellow', '1b': 'green', '2': 'grey', '3': 'orange', '4': 'black', + '5': 'white'} + + fig, ax = plt.subplots() + name_of_plot = 'Usage of Healthcare Worker Time By Month' + capacity_unstacked = capacity_by_facility.unstack() + for i in capacity_unstacked.columns: + + level = find_level_for_facility(i) + + if level is None: + continue + + h1, = ax.plot( + capacity_unstacked[i].index, + capacity_unstacked[i].values, + color=color_for_level[level], + linewidth=0.5, + label=f'Facility_Level {level}' + ) + # for i in capacity_unstacked.columns: + # if i != 'All': + # level = find_level_for_facility(i) + # h1, = ax.plot(capacity_unstacked[i].index, capacity_unstacked[i].values, + # color=color_for_level[level], linewidth=0.5, label=f'Facility_Level {level}') + + if 'All' in capacity_unstacked.columns: + h2, = ax.plot( + capacity_unstacked['All'].index, + capacity_unstacked['All'].values, + color='red', + linewidth=1.5 + ) + ax.legend([h1, h2], ['Each Facility', 'All Facilities']) + else: + ax.legend([h1], ['Each Facility']) + + ax.set_title(name_of_plot) + ax.set_xlabel('Month') + ax.set_ylabel('Fraction of all time used\n(Average for the month)') + + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_'))) + plt.close(fig) + + fig, ax = plt.subplots() + name_of_plot = 'Usage of Healthcare Worker Time (Average)' + capacity_unstacked_average = capacity_by_facility.unstack().mean() + # levels = [find_level_for_facility(i) if i != 'All' else 'All' for i in capacity_unstacked_average.index] + xpos_for_level = dict(zip((color_for_level.keys()), range(len(color_for_level)))) + xpos_for_level.update({'1b': 2, '2': 2, '3': 3, '4': 4, '5': 5}) + for id, val in capacity_unstacked_average.items(): + if id != 'All': + _level = find_level_for_facility(id) + + # Skip if facility level could not be determined + if _level is None: + continue + + if _level != '5': + xpos = xpos_for_level[_level] + scatter = (np.random.rand() - 0.5) * 0.25 + h1, = ax.plot(xpos + scatter, val * 100, color=color_for_level[_level], + marker='.', markersize=15, label='Each Facility', linestyle='none') + if 'All' in capacity_unstacked_average.index: + h2 = ax.axhline( + y=capacity_unstacked_average['All'] * 100, + color='red', + linestyle='--', + label='Average' + ) + ax.set_title(name_of_plot) + ax.set_xlabel('Facility_Level') + ax.set_xticks(list(xpos_for_level.values())) + ax.set_xticklabels(xpos_for_level.keys()) + ax.set_ylabel('Percent of Time Available That is Used\n') + ax.legend(handles=[h1, h2]) + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_'))) + plt.close(fig) + + fig, ax = plt.subplots() + name_of_plot = 'Usage of Healthcare Worker Time by Cadre and Facility_Level' + (100.0 * capacity_by_officer.unstack()).T.plot.bar(ax=ax) + ax.legend() + ax.set_xlabel('Facility_Level') + ax.set_ylabel('Percent of time that is used') + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + ax.set_title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_'))) + plt.close(fig) + + +def get_yearly_hr_count(_df): + + if 'GenericClinic' not in _df.columns: + return None + + years = _df['date'].dt.year.rename("year") + + # Expand facility dictionary + staff_df = _df['GenericClinic'].apply(pd.Series) + + # Extract facility IDs + facility_ids = [ + int(c.split("FacilityID_")[1].split("_")[0]) + for c in staff_df.columns + ] + + # Extract cadre names + cadres = [ + c.split("Officer_")[-1] + for c in staff_df.columns + ] + + # Load Master Facility List + mfl = pd.read_csv( + Path("./resources/healthsystem/organisation/ResourceFile_Master_Facilities_List.csv") + ).set_index("Facility_ID") + + # Map facilities to districts + districts = [ + mfl.loc[fid, "District"] if fid in mfl.index else "Unknown" + for fid in facility_ids + ] + + # Create MultiIndex columns + staff_df.columns = pd.MultiIndex.from_arrays( + [districts, cadres], + names=["District", "Cadre"] + ) + + # Sum yearly + staff_df = staff_df.groupby(years).sum() + + # Sum facilities within district/cadre + staff_df = staff_df.T.groupby(level=[0, 1]).sum().T + + # POP_SCALE = 145.39609 + # staff_df = staff_df * POP_SCALE + + # Convert columns to index + return staff_df.stack([0, 1]) + + +def extract_staff_counts(results_folder): + return extract_results( + results_folder, + module="tlo.methods.healthsystem.summary", + key="number_of_hcw_staff", + custom_generate_series=get_yearly_hr_count, + do_scaling=False + ) + + +def set_param_names_as_column_index_level_0(_df, param_names): + """Set column index level 0 (draw numbers) to scenario names.""" + ordered_param_names = {i: x for i, x in enumerate(param_names)} + names_of_cols_level0 = [ + ordered_param_names.get(col) + for col in _df.columns.levels[0] + ] + _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) + return _df + + +def plot_staff_counts_by_cadre_across_scenarios_by_district( + staff_counts_summary, + output_folder +): + + scenario_names = staff_counts_summary.columns.get_level_values(0).unique() + + districts = ( + staff_counts_summary.index + .get_level_values("District") + .unique() + ) + + cadres = ( + staff_counts_summary.index + .get_level_values("Cadre") + .unique() + ) + + for district in districts: + + district_df = staff_counts_summary.xs( + district, + level="District" + ) + + for cadre in cadres: + + if cadre not in district_df.index.get_level_values("Cadre"): + continue + + fig, ax = plt.subplots() + + for scenario in scenario_names: + + central = district_df[(scenario, "mean")].xs( + cadre, + level="Cadre" + ) + + lower = district_df[(scenario, "lower")].xs( + cadre, + level="Cadre" + ) + + upper = district_df[(scenario, "upper")].xs( + cadre, + level="Cadre" + ) + + years = central.index + + ax.plot( + years, + central.values, + label=scenario + ) + + ax.fill_between( + years, + np.maximum(lower.values, 0), + upper.values, + alpha=0.25 + ) + + ax.set_title( + f"{cadre} Staff Counts Across Scenarios ({district})" + ) + + ax.set_xlabel("Year") + ax.set_ylabel("Average Number of Health Workers") + + ax.legend() + + fig.tight_layout() + + fig.savefig( + output_folder / + f"{district}_{cadre}_staff_counts_across_scenarios.png" + ) + + plt.close(fig) + + +def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None): + """Description of the usage of healthcare system resources.""" + + # figure2_appointments_used( + # results_folder=results_folder, output_folder=output_folder, resourcefilepath=resourcefilepath + # ) + log = load_pickled_dataframes(results_folder, 0, 0) + print(log.keys()) + + print(log['tlo.methods.healthsystem.summary'].keys()) + + # STEP 1: extract staff counts + staff_counts = extract_staff_counts(results_folder) + + # STEP 2: rename draws to scenario names + param_names = tuple(StaffingScenario()._scenarios.keys()) + + staff_counts = staff_counts.pipe( + set_param_names_as_column_index_level_0, + param_names=param_names + ) + + # STEP 3: summarize runs + print(type(staff_counts)) + print(staff_counts.head()) + staff_counts_summary = summarize(staff_counts) + + print("\n=== Staff counts summary ===") + print(staff_counts_summary.index.names) + + print("\n=== Staff counts from 2025–2034 ===") + + # Select years 2025–2034 + years_to_check = range(2025, 2035) + + export_df = staff_counts_summary.reset_index() + + # Filter the years + export_df = export_df[export_df["year"].isin(years_to_check)] + + # Save to Excel + export_path = output_folder / "debug_staff_counts_2025_2034.xlsx" + export_df.to_excel(export_path) + + print(f"Staff counts exported to: {export_path}") + + # STEP 4: plot + plot_staff_counts_by_cadre_across_scenarios_by_district( + staff_counts_summary, + output_folder + ) + + figure4_hr_use_overall( + results_folder=results_folder, output_folder=output_folder, resourcefilepath=resourcefilepath + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument( + "--scenario-outputs-folder", + type=Path, + required=True, + help="Path to folder containing scenario outputs", + ) + parser.add_argument( + "--show-figures", + action="store_true", + help="Whether to interactively show figures", + ) + parser.add_argument( + "--save-figures", + action="store_true", + help="Whether to save figures", + ) + args = parser.parse_args() + + # Use the command-line argument instead of hardcoded path + results_folder = args.scenario_outputs_folder + # results_folder = Path( + # './outputs/wamulwafu@kuhes.ac.mw/nurses_scenario_outputs-2026-02-09T110530Z' + # ) + + apply( + results_folder=results_folder, # or directly: args.scenario_outputs_folder + output_folder=results_folder, + resourcefilepath=Path('./resources') + ) From 7dac1f3984e5cfd80a2743e608ceaabe2e57f167 Mon Sep 17 00:00:00 2001 From: thewati Date: Thu, 4 Jun 2026 09:22:10 +0200 Subject: [PATCH 23/52] dalys and deaths --- .../analysis_nurses_scenario.py | 20 +- .../analysis_nurses_scenario_dalys.py | 2343 +++++++++++++++++ .../analysis_staff_num_more.py | 551 ++-- 3 files changed, 2538 insertions(+), 376 deletions(-) create mode 100644 src/scripts/nurses_analyses/analysis_nurses_scenario_dalys.py diff --git a/src/scripts/nurses_analyses/analysis_nurses_scenario.py b/src/scripts/nurses_analyses/analysis_nurses_scenario.py index 8f8e84ea5c..5ae3146295 100644 --- a/src/scripts/nurses_analyses/analysis_nurses_scenario.py +++ b/src/scripts/nurses_analyses/analysis_nurses_scenario.py @@ -166,9 +166,15 @@ def plot_summarized_deaths_by_age(deaths_summarized_by_age): scenario_info = get_scenario_info(results_folder) # Get scenario names directly from Scenario class - param_names = tuple(StaffingScenario()._scenarios.keys()) + # Keep only scenarios with Default Healthsystem Function + default_hs_scenarios = [ + "Baseline Nurses / Default Healthsystem Function", + "Fewer Nurses / Default Healthsystem Function", + "More Nurses / Default Healthsystem Function", + ] + # Total deaths total_deaths = extract_total_deaths(results_folder).pipe( set_param_names_as_column_index_level_0, @@ -177,6 +183,12 @@ def plot_summarized_deaths_by_age(deaths_summarized_by_age): summarized_total_deaths = summarize(total_deaths) + # Filter to Default Healthsystem Function scenarios only + summarized_total_deaths = summarized_total_deaths.loc[ + :, + summarized_total_deaths.columns.get_level_values(0).isin(default_hs_scenarios) + ] + fig_1, ax_1 = plot_summarized_total_deaths(summarized_total_deaths) # Deaths by age @@ -187,6 +199,12 @@ def plot_summarized_deaths_by_age(deaths_summarized_by_age): summarized_deaths_by_age = summarize(deaths_by_age) + # Filter to Default Healthsystem Function scenarios only + summarized_deaths_by_age = summarized_deaths_by_age.loc[ + :, + summarized_deaths_by_age.columns.get_level_values(0).isin(default_hs_scenarios) + ] + fig_2, ax_2 = plot_summarized_deaths_by_age(summarized_deaths_by_age) if args.show_figures: diff --git a/src/scripts/nurses_analyses/analysis_nurses_scenario_dalys.py b/src/scripts/nurses_analyses/analysis_nurses_scenario_dalys.py new file mode 100644 index 0000000000..ba22c44da6 --- /dev/null +++ b/src/scripts/nurses_analyses/analysis_nurses_scenario_dalys.py @@ -0,0 +1,2343 @@ +"""Plot DALYs across nurse staffing scenarios. + +This script produces two figures for the Default Healthsystem Function scenarios only: + +1. Annual DALYs by year (three lines): + - Baseline Nurses / Default Healthsystem Function + - Fewer Nurses / Default Healthsystem Function + - More Nurses / Default Healthsystem Function + +2. Percent of DALYs averted compared to Baseline + (total between 2027 and 2034): + - More Nurses + - Fewer Nurses +""" + +import argparse +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +from scripts.nurses_analyses.nurses_scenario_analyses import StaffingScenario +from tlo.analysis.utils import ( + extract_results, + load_pickled_dataframes, + summarize, +) + + +# ----------------------------------------------------------------------------- +# Helper function: rename draw numbers to scenario names +# ----------------------------------------------------------------------------- +def set_param_names_as_column_index_level_0(_df, param_names): + """Set column index level 0 (draw numbers) to scenario names.""" + ordered_param_names = {i: x for i, x in enumerate(param_names)} + names_of_cols_level0 = [ + ordered_param_names.get(col) + for col in _df.columns.levels[0] + ] + _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) + return _df + + +# ----------------------------------------------------------------------------- +# Extract annual DALYs +# ----------------------------------------------------------------------------- +def extract_annual_dalys(results_folder): + def get_num_dalys_yearly(df: pd.DataFrame) -> pd.Series: + """Return total DALYs for each year.""" + # Sum all cause columns after removing metadata columns + yearly = ( + df.drop(columns=["date", "sex", "age_range"], errors="ignore") + .groupby("year") + .sum() + .sum(axis=1) + ) + return yearly + + return extract_results( + results_folder, + module="tlo.methods.healthburden", + key="dalys_stacked", + custom_generate_series=get_num_dalys_yearly, + do_scaling=True, + ) + + +# Extract annual Deaths +def extract_annual_deaths(results_folder): + def get_num_deaths_yearly(df: pd.DataFrame) -> pd.Series: + """Return total deaths for each year.""" + + yearly = ( + df.assign(year=df["date"].dt.year) + .groupby("year")["person_id"] + .count() + ) + + return yearly + + return extract_results( + results_folder, + module="tlo.methods.demography", + key="death", + custom_generate_series=get_num_deaths_yearly, + do_scaling=True, + ) + + +# ----------------------------------------------------------------------------- +# Plot 1: Annual DALYs over time +# ----------------------------------------------------------------------------- +def plot_annual_dalys(summarized_annual_dalys): + fig, ax = plt.subplots(figsize=(10, 6)) + + scenario_names = summarized_annual_dalys.columns.get_level_values(0).unique() + + # Short labels for legend + label_map = { + "Baseline Nurses / Default Healthsystem Function": "Baseline", + "Fewer Nurses / Default Healthsystem Function": "Fewer nurses", + "More Nurses / Default Healthsystem Function": "More nurses", + + "Baseline Nurses / Improved Healthsystem Function": "Baseline", + "Fewer Nurses / Improved Healthsystem Function": "Fewer nurses", + "More Nurses / Improved Healthsystem Function": "More nurses", + } + + for scenario in scenario_names: + years = summarized_annual_dalys.index.astype(int) + means = summarized_annual_dalys[(scenario, "mean")].values + lowers = summarized_annual_dalys[(scenario, "lower")].values + uppers = summarized_annual_dalys[(scenario, "upper")].values + + print(means.min(), means.max()) + + ax.plot( + years, + means, + linewidth=2, + label=label_map.get(scenario, scenario), + ) + + ax.fill_between( + years, + lowers, + uppers, + alpha=0.2, + ) + + ax.set_xlabel("Year") + ax.set_ylabel("Annual DALYs") + ax.legend() + ax.grid(alpha=0.3) + ax.set_xlim(2025, 2034) + ax.set_ylim(bottom=8e6) + # ax.set_ylim(bottom=0.8) + fig.tight_layout() + + return fig, ax + + +# Plot: Annual Deaths over time +def plot_annual_deaths(summarized_annual_deaths): + fig, ax = plt.subplots(figsize=(10, 6)) + + scenario_names = ( + summarized_annual_deaths.columns + .get_level_values(0) + .unique() + ) + + label_map = { + "Baseline Nurses / Default Healthsystem Function": "Baseline", + "Fewer Nurses / Default Healthsystem Function": "Fewer nurses", + "More Nurses / Default Healthsystem Function": "More nurses", + + "Baseline Nurses / Improved Healthsystem Function": "Baseline", + "Fewer Nurses / Improved Healthsystem Function": "Fewer nurses", + "More Nurses / Improved Healthsystem Function": "More nurses", + } + + for scenario in scenario_names: + years = summarized_annual_deaths.index.astype(int) + + means = summarized_annual_deaths[ + (scenario, "mean") + ].values + + lowers = summarized_annual_deaths[ + (scenario, "lower") + ].values + + uppers = summarized_annual_deaths[ + (scenario, "upper") + ].values + + ax.plot( + years, + means, + linewidth=2, + label=label_map.get(scenario, scenario), + ) + + ax.fill_between( + years, + lowers, + uppers, + alpha=0.2, + ) + + ax.set_xlabel("Year") + ax.set_ylabel("Annual deaths") + + ax.legend() + + ax.grid(alpha=0.3) + + ax.set_xlim(2025, 2034) + + fig.tight_layout() + + return fig, ax + + +# Extract deaths by cause +def extract_deaths_by_cause(results_folder): + def get_deaths_by_cause(df: pd.DataFrame) -> pd.Series: + """ + Return deaths by cause aggregated across 2027–2034. + """ + + # Add year + df = df.assign(year=df["date"].dt.year) + + # Restrict years + df = df[df["year"].between(2027, 2034)] + + # CHANGE THIS if your column name differs + cause_col = "cause" + + deaths_by_cause = ( + df.groupby(cause_col)["person_id"] + .count() + ) + + return deaths_by_cause + + return extract_results( + results_folder, + module="tlo.methods.demography", + key="death", + custom_generate_series=get_deaths_by_cause, + do_scaling=True, + ) + + +# Extract deaths by age group +# ----------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- +# Extract deaths by age group +# ----------------------------------------------------------------------------- +def extract_deaths_by_age_group(results_folder): + + def get_deaths_by_age_group(df: pd.DataFrame) -> pd.Series: + """ + Return deaths by age group aggregated across 2027–2034. + """ + + # --------------------------------------------------------- + # Add year + # --------------------------------------------------------- + df = df.assign(year=df["date"].dt.year) + + # Restrict years + df = df[df["year"].between(2027, 2034)] + + # --------------------------------------------------------- + # Create age groups + # --------------------------------------------------------- + age_bins = [ + 0, 5, 10, 15, 20, 25, 30, 35, + 40, 45, 50, 55, 60, 65, 70, + 75, 80, np.inf + ] + + age_labels = [ + "0-4", + "5-9", + "10-14", + "15-19", + "20-24", + "25-29", + "30-34", + "35-39", + "40-44", + "45-49", + "50-54", + "55-59", + "60-64", + "65-69", + "70-74", + "75-79", + "80+", + ] + + df["age_group"] = pd.cut( + df["age"], + bins=age_bins, + labels=age_labels, + right=False, + ) + + # --------------------------------------------------------- + # Aggregate deaths by age group + # --------------------------------------------------------- + deaths_by_age = ( + df.groupby("age_group")["person_id"] + .count() + ) + + return deaths_by_age + + return extract_results( + results_folder, + module="tlo.methods.demography", + key="death", + custom_generate_series=get_deaths_by_age_group, + do_scaling=True, + ) + + +# Extract DALYs by cause +def extract_dalys_by_cause(results_folder): + def get_dalys_by_cause(df: pd.DataFrame) -> pd.Series: + """ + Return DALYs by cause aggregated across 2027–2034. + """ + + # Add year + df = df.assign(year=df["date"].dt.year) + + # Restrict years + df = df[df["year"].between(2027, 2034)] + + # Remove metadata columns + metadata_cols = [ + "date", + "sex", + "age_range", + "year", + ] + + cause_cols = [ + c for c in df.columns + if c not in metadata_cols + ] + + # Sum DALYs for each cause + return df[cause_cols].sum() + + return extract_results( + results_folder, + module="tlo.methods.healthburden", + key="dalys_stacked", + custom_generate_series=get_dalys_by_cause, + do_scaling=True, + ) + + +# ----------------------------------------------------------------------------- +# Extract DALYs by age group +# ----------------------------------------------------------------------------- +def extract_dalys_by_age_group(results_folder): + + def get_dalys_by_age_group(df: pd.DataFrame) -> pd.Series: + """ + Return DALYs by age group aggregated across 2027–2034. + """ + + # Add year + df = df.assign(year=df["date"].dt.year) + + # Restrict years + df = df[df["year"].between(2027, 2034)] + + # Metadata columns to exclude + metadata_cols = [ + "date", + "sex", + "age_range", + "year", + ] + + # DALY cause columns + cause_cols = [ + c for c in df.columns + if c not in metadata_cols + ] + + # Sum DALYs across causes first + df["total_dalys"] = df[cause_cols].sum(axis=1) + + # Aggregate by age group + dalys_by_age = ( + df.groupby("age_range")["total_dalys"] + .sum() + ) + + return dalys_by_age + + return extract_results( + results_folder, + module="tlo.methods.healthburden", + key="dalys_stacked", + custom_generate_series=get_dalys_by_age_group, + do_scaling=True, + ) + + +# ----------------------------------------------------------------------------- +# Plot 2: Percent DALYs averted relative to baseline (2027–2034) +# ----------------------------------------------------------------------------- +def calculate_percent_dalys_averted( + summarized_annual_dalys, + baseline_scenario, + comparison_years=range(2027, 2035), +): + """ + Calculate % DALYs averted relative to baseline. + + Returns DataFrame with: + mean + lower + upper + """ + + years = summarized_annual_dalys.index.astype(int) + year_mask = np.isin(years, list(comparison_years)) + + scenario_names = summarized_annual_dalys.columns.get_level_values(0).unique() + + results = {} + + # Baseline totals + baseline_mean = ( + summarized_annual_dalys[(baseline_scenario, "mean")] + .values[year_mask] + .sum() + ) + + baseline_lower = ( + summarized_annual_dalys[(baseline_scenario, "lower")] + .values[year_mask] + .sum() + ) + + baseline_upper = ( + summarized_annual_dalys[(baseline_scenario, "upper")] + .values[year_mask] + .sum() + ) + + for scenario in scenario_names: + + if scenario == baseline_scenario: + continue + + scenario_mean = ( + summarized_annual_dalys[(scenario, "mean")] + .values[year_mask] + .sum() + ) + + scenario_lower = ( + summarized_annual_dalys[(scenario, "lower")] + .values[year_mask] + .sum() + ) + + scenario_upper = ( + summarized_annual_dalys[(scenario, "upper")] + .values[year_mask] + .sum() + ) + + mean_averted = ( + (baseline_mean - scenario_mean) + / baseline_mean + * 100.0 + ) + + lower_averted = ( + (baseline_lower - scenario_upper) + / baseline_lower + * 100.0 + ) + + upper_averted = ( + (baseline_upper - scenario_lower) + / baseline_upper + * 100.0 + ) + + results[scenario] = { + "mean": mean_averted, + "lower": lower_averted, + "upper": upper_averted, + } + + return pd.DataFrame(results).T + + +def calculate_percent_deaths_averted( + summarized_annual_deaths, + baseline_scenario, + comparison_years=range(2027, 2035), +): + years = summarized_annual_deaths.index.astype(int) + + year_mask = np.isin(years, list(comparison_years)) + + scenario_names = ( + summarized_annual_deaths.columns + .get_level_values(0) + .unique() + ) + + results = {} + + baseline_mean = ( + summarized_annual_deaths[ + (baseline_scenario, "mean") + ] + .values[year_mask] + .sum() + ) + + baseline_lower = ( + summarized_annual_deaths[ + (baseline_scenario, "lower") + ] + .values[year_mask] + .sum() + ) + + baseline_upper = ( + summarized_annual_deaths[ + (baseline_scenario, "upper") + ] + .values[year_mask] + .sum() + ) + + for scenario in scenario_names: + + if scenario == baseline_scenario: + continue + + scenario_mean = ( + summarized_annual_deaths[ + (scenario, "mean") + ] + .values[year_mask] + .sum() + ) + + scenario_lower = ( + summarized_annual_deaths[ + (scenario, "lower") + ] + .values[year_mask] + .sum() + ) + + scenario_upper = ( + summarized_annual_deaths[ + (scenario, "upper") + ] + .values[year_mask] + .sum() + ) + + mean_averted = ( + (baseline_mean - scenario_mean) + / baseline_mean + * 100.0 + ) + + lower_averted = ( + (baseline_lower - scenario_upper) + / baseline_lower + * 100.0 + ) + + upper_averted = ( + (baseline_upper - scenario_lower) + / baseline_upper + * 100.0 + ) + + results[scenario] = { + "mean": mean_averted, + "lower": lower_averted, + "upper": upper_averted, + } + + return pd.DataFrame(results).T + + +# Calculate % DALYs averted by cause +def calculate_percent_dalys_averted_by_cause( + summarized_dalys_by_cause, + baseline_scenario, +): + """ + Returns DataFrame indexed by cause with columns: + more_nurses + fewer_nurses + """ + + scenario_names = ( + summarized_dalys_by_cause.columns + .get_level_values(0) + .unique() + ) + + baseline = summarized_dalys_by_cause[ + (baseline_scenario, "mean") + ] + + results = pd.DataFrame(index=baseline.index) + + for scenario in scenario_names: + + if scenario == baseline_scenario: + continue + + scenario_values = summarized_dalys_by_cause[ + (scenario, "mean") + ] + + percent_averted = ( + (baseline - scenario_values) + / baseline + * 100.0 + ) + + if "More Nurses" in scenario: + results["More nurses"] = percent_averted + + elif "Fewer Nurses" in scenario: + # Make negative for mirrored plotting + results["Fewer nurses"] = -percent_averted + + return results + + +# ----------------------------------------------------------------------------- +# Calculate % deaths averted by cause +# ----------------------------------------------------------------------------- +def calculate_percent_deaths_averted_by_cause( + summarized_deaths_by_cause, + baseline_scenario, +): + """ + Returns DataFrame indexed by cause with columns: + More nurses + Fewer nurses + """ + + scenario_names = ( + summarized_deaths_by_cause.columns + .get_level_values(0) + .unique() + ) + + baseline = summarized_deaths_by_cause[ + (baseline_scenario, "mean") + ] + + results = pd.DataFrame(index=baseline.index) + + for scenario in scenario_names: + if scenario == baseline_scenario: + continue + + scenario_values = summarized_deaths_by_cause[ + (scenario, "mean") + ] + + percent_averted = ( + (baseline - scenario_values) + / baseline + * 100.0 + ) + if "More Nurses" in scenario: + results["More nurses"] = percent_averted + + elif "Fewer Nurses" in scenario: + results["Fewer nurses"] = -percent_averted + + return results + + +# ----------------------------------------------------------------------------- +# Calculate % DALYs averted by age group +# ----------------------------------------------------------------------------- +def calculate_percent_dalys_averted_by_age_group( + summarized_dalys_by_age, + baseline_scenario, +): + + scenario_names = ( + summarized_dalys_by_age.columns + .get_level_values(0) + .unique() + ) + + results = {} + + baseline_mean = summarized_dalys_by_age[ + (baseline_scenario, "mean") + ] + + baseline_lower = summarized_dalys_by_age[ + (baseline_scenario, "lower") + ] + + baseline_upper = summarized_dalys_by_age[ + (baseline_scenario, "upper") + ] + + for scenario in scenario_names: + + if scenario == baseline_scenario: + continue + + scenario_mean = summarized_dalys_by_age[ + (scenario, "mean") + ] + + scenario_lower = summarized_dalys_by_age[ + (scenario, "lower") + ] + + scenario_upper = summarized_dalys_by_age[ + (scenario, "upper") + ] + + mean_averted = ( + (baseline_mean - scenario_mean) + / baseline_mean + * 100.0 + ) + + lower_averted = ( + (baseline_lower - scenario_upper) + / baseline_lower + * 100.0 + ) + + upper_averted = ( + (baseline_upper - scenario_lower) + / baseline_upper + * 100.0 + ) + + print(mean_averted.describe()) + + print("\n", scenario) + print("mean:") + print(mean_averted.head()) + + if "Fewer Nurses" in scenario: + positive_values = mean_averted[mean_averted > 0] + + print("\nPOSITIVE VALUES IN FEWER NURSES:") + print(positive_values) + + print("\nNUMBER OF POSITIVE AGE GROUPS:") + print(len(positive_values)) + + results[scenario] = pd.DataFrame({ + "mean": mean_averted, + "lower": lower_averted, + "upper": upper_averted, + }) + + return results + + +# Calculate % deaths averted by age group +def calculate_percent_deaths_averted_by_age_group( + summarized_deaths_by_age, + baseline_scenario, +): + """ + Returns DataFrame indexed by age group with columns: + More nurses + Fewer nurses + """ + + scenario_names = ( + summarized_deaths_by_age.columns + .get_level_values(0) + .unique() + ) + + baseline = summarized_deaths_by_age[ + (baseline_scenario, "mean") + ] + + results = pd.DataFrame(index=baseline.index) + + for scenario in scenario_names: + + if scenario == baseline_scenario: + continue + + scenario_values = summarized_deaths_by_age[ + (scenario, "mean") + ] + + percent_averted = np.where( + baseline > 0, + (baseline - scenario_values) + / baseline + * 100.0, + np.nan, + ) + + if "More Nurses" in scenario: + results["More nurses"] = percent_averted + + elif "Fewer Nurses" in scenario: + results["Fewer nurses"] = -percent_averted + + return results + +# ----------------------------------------------------------------------------- +# District-level plot: % DALYs averted compared to baseline (2027–2034) +# ----------------------------------------------------------------------------- + +# def extract_annual_dalys_by_district(results_folder): +# """ +# Extract annual DALYs by district. +# +# This uses the same facility-to-district mapping approach +# that worked for staff counts. +# """ +# +# def get_dalys_by_district(df: pd.DataFrame) -> pd.Series: +# """Return total DALYs for each year and district.""" +# +# # Check if we have the right data structure +# if 'date' not in df.columns: +# return pd.Series(dtype=float) +# +# # Extract year +# years = df['date'].dt.year.rename("year") +# +# # Identify district column - for DALYs, district might not be directly available +# # Instead, we need to aggregate from facility-level data if available +# +# # For now, if no district column, return national-level with "National" as district +# if 'district' not in df.columns and 'District' not in df.columns: +# # Sum all DALY causes +# daly_cols = [c for c in df.columns if c not in ['date', 'year', 'sex', 'age_range', 'li_wealth']] +# yearly_total = df.groupby(years)[daly_cols].sum().sum(axis=1) +# +# # Create Series with (year, "National") index +# result = pd.Series( +# yearly_total.values, +# index=pd.MultiIndex.from_arrays([yearly_total.index, ["National"] * len(yearly_total)], +# names=["year", "District"]) +# ) +# return result +# +# # If district column exists, use it +# district_col = 'district' if 'district' in df.columns else 'District' +# daly_cols = [c for c in df.columns if c not in ['date', 'year', 'sex', 'age_range', 'li_wealth', district_col]] +# +# # Group by year and district +# grouped = df.groupby([years, district_col])[daly_cols].sum().sum(axis=1) +# grouped.index = grouped.index.set_names(["year", "District"]) +# +# return grouped.astype(float) +# +# return extract_results( +# results_folder, +# module="tlo.methods.healthburden", +# key="dalys_stacked", # Try this key instead +# custom_generate_series=get_dalys_by_district, +# do_scaling=True, +# ) + + +def find_facility_level_data(results_folder): + """Inspect HealthBurden outputs properly.""" + + from tlo.analysis.utils import load_pickled_dataframes + + log = load_pickled_dataframes(results_folder) + + print("\n" + "=" * 60) + print("Inspecting HealthBurden outputs...") + print("=" * 60) + + healthburden_data = log.get("tlo.methods.healthburden", {}) + + for key_name, obj in healthburden_data.items(): + + print(f"\nKEY: {key_name}") + print("-" * 50) + + try: + print(f"TYPE: {type(obj)}") + + # If DataFrame directly + if isinstance(obj, pd.DataFrame): + print("DataFrame detected") + print("Columns:") + print(obj.columns.tolist()) + print("\nHEAD:") + print(obj.head()) + continue + + # If dict-like + if isinstance(obj, dict): + + print(f"DICT KEYS: {list(obj.keys())[:5]}") + + first_key = list(obj.keys())[0] + + first_obj = obj[first_key] + + print(f"FIRST OBJECT TYPE: {type(first_obj)}") + + if isinstance(first_obj, pd.DataFrame): + print("Columns:") + print(first_obj.columns.tolist()) + + print("\nHEAD:") + print(first_obj.head()) + + else: + print(first_obj) + + continue + + print(obj) + + except Exception as e: + print(f"ERROR: {e}") + + +def check_all_dalys_columns(results_folder): + """Check every DALY-related key for any facility/district columns""" + from tlo.analysis.utils import load_pickled_dataframes + + log = load_pickled_dataframes(results_folder) + healthburden = log['tlo.methods.healthburden'] + + daly_keys = ['dalys', 'dalys_stacked', 'dalys_stacked_by_age_and_time', + 'dalys_by_wealth_stacked_by_age_and_time'] + + facility_keywords = ['facility', 'district', 'Facility', 'District', + 'facility_id', 'Facility_ID', 'clinic', 'Clinic'] + + for key in daly_keys: + if key not in healthburden: + continue + + print(f"\n{'=' * 50}") + print(f"Checking: {key}") + print('=' * 50) + + sample = healthburden[key][0] + all_columns = sample.columns.tolist() + + print(f"Total columns: {len(all_columns)}") + print(f"Sample columns: {all_columns[:15]}...") + + # Check for facility/district columns + found = [] + for col in all_columns: + for kw in facility_keywords: + if kw.lower() in col.lower(): + found.append(col) + + if found: + print(f"\n✓ FOUND facility/district columns: {found}") + else: + print("\n❌ No facility or district columns found") + + +def check_death_columns(results_folder): + """Inspect death log columns for district/location information.""" + + from tlo.analysis.utils import load_pickled_dataframes + + log = load_pickled_dataframes(results_folder) + + death_log = log["tlo.methods.demography"]["death"] + + print("\n" + "=" * 60) + print("CHECKING DEATH LOG COLUMNS") + print("=" * 60) + + # Handle dict structure + if isinstance(death_log, dict): + first_key = list(death_log.keys())[0] + sample = death_log[first_key] + else: + sample = death_log + + print(sample.columns.tolist()) + + # Search for district-like columns + keywords = [ + "district", + "District", + "facility", + "Facility", + "region", + "location", + ] + + found = [] + + for col in sample.columns: + for kw in keywords: + if kw.lower() in col.lower(): + found.append(col) + + print("\nPossible district/location columns:") + print(found) + print("\nPossible death columns:") + print(sample.columns.tolist()) + + +def inspect_population_log(results_folder): + from tlo.analysis.utils import load_pickled_dataframes + + log = load_pickled_dataframes(results_folder) + + demography = log["tlo.methods.demography"] + + print("\n" + "=" * 60) + print("INSPECTING POPULATION LOG") + print("=" * 60) + + population_obj = demography["population"] + + print(f"\nTYPE: {type(population_obj)}") + + print("\nDICT KEYS:") + print(population_obj.keys()) + + # Take first run + first_run_key = list(population_obj.keys())[0] + + print(f"\nFIRST RUN KEY: {first_run_key}") + + pop_df = population_obj[first_run_key] + + print(f"\nOBJECT TYPE: {type(pop_df)}") + + if isinstance(pop_df, pd.DataFrame): + print("\nCOLUMNS:") + print(pop_df.columns.tolist()) + + print("\nHEAD:") + print(pop_df.head()) + + print("\nPOSSIBLE LOCATION COLUMNS:") + + location_cols = [ + c for c in pop_df.columns + if any( + kw in c.lower() + for kw in [ + "district", + "region", + "facility", + "location", + "residence" + ] + ) + ] + + print(location_cols) + + +# def calculate_percent_dalys_averted_by_district( +# summarized_annual_dalys_by_district, +# baseline_scenario, +# comparison_years=range(2027, 2035), +# ): +# """ +# Calculate % DALYs averted by district relative to baseline. +# +# Returns a DataFrame: +# index = District +# columns = scenarios (excluding baseline) +# +# Positive values = DALYs averted +# Negative values = additional DALYs. +# +# This function is robust to whether the summarized dataframe index is: +# 1. A MultiIndex: (year, district) +# 2. A single Index of tuples: [(year, district), ...] +# """ +# +# # --------------------------------------------------------------------- +# # Reconstruct a proper MultiIndex if summarize() collapsed it into +# # a single-level Index containing tuples like (year, district) +# # --------------------------------------------------------------------- +# if not isinstance( +# summarized_annual_dalys_by_district.index, +# pd.MultiIndex +# ): +# first_value = summarized_annual_dalys_by_district.index[0] +# +# # If index entries are tuples of length 2, rebuild MultiIndex +# if isinstance(first_value, tuple) and len(first_value) == 2: +# summarized_annual_dalys_by_district = ( +# summarized_annual_dalys_by_district.copy() +# ) +# +# summarized_annual_dalys_by_district.index = pd.MultiIndex.from_tuples( +# summarized_annual_dalys_by_district.index, +# names=["year", "District"], +# ) +# else: +# raise ValueError( +# "District-level DALY data does not have a " +# "(year, district) index structure." +# ) +# +# # --------------------------------------------------------------------- +# # At this point we are guaranteed to have a MultiIndex: +# # level 0 = year +# # level 1 = district +# # --------------------------------------------------------------------- +# districts = ( +# summarized_annual_dalys_by_district.index +# .get_level_values(1) +# .unique() +# ) +# +# scenario_names = ( +# summarized_annual_dalys_by_district.columns +# .get_level_values(0) +# .unique() +# ) +# +# comparison_scenarios = [ +# s for s in scenario_names +# if s != baseline_scenario +# ] +# +# # Results DataFrame +# results = pd.DataFrame( +# index=districts, +# columns=comparison_scenarios, +# dtype=float, +# ) +# +# # --------------------------------------------------------------------- +# # Compute % DALYs averted for each district +# # --------------------------------------------------------------------- +# for district in districts: +# +# # Select all years for this district +# district_df = summarized_annual_dalys_by_district.xs( +# district, +# level=1 +# ) +# +# # Keep only years in comparison period +# district_df = district_df.loc[ +# district_df.index.isin(comparison_years) +# ] +# +# # Skip if no data +# if district_df.empty: +# continue +# +# # Baseline DALYs total +# baseline_total = district_df[ +# (baseline_scenario, "mean") +# ].sum() +# +# # Avoid divide by zero +# if baseline_total == 0: +# continue +# +# # Comparison scenarios +# for scenario in comparison_scenarios: +# scenario_total = district_df[ +# (scenario, "mean") +# ].sum() +# +# percent_averted = ( +# (baseline_total - scenario_total) +# / baseline_total +# * 100.0 +# ) +# +# results.loc[district, scenario] = percent_averted +# +# # Remove districts with all missing values +# results = results.dropna(how="all") +# +# # Sort alphabetically +# results = results.sort_index() +# +# return results + + +# def plot_percent_dalys_averted_by_district(percent_averted_by_district): +# """ +# Create horizontal district-level bar chart. +# Bars to the right = DALYs averted (positive) +# Bars to the left = Additional DALYs (negative) +# """ +# +# # Desired scenario order and labels +# scenario_order = [ +# "Fewer Nurses / Default Healthsystem Function", +# "More Nurses / Default Healthsystem Function", +# ] +# +# scenario_order = [ +# s for s in scenario_order +# if s in percent_averted_by_district.columns +# ] +# +# label_map = { +# "Fewer Nurses / Default Healthsystem Function": "Fewer nurses", +# "More Nurses / Default Healthsystem Function": "More nurses", +# } +# +# districts = percent_averted_by_district.index.tolist() +# y = np.arange(len(districts)) +# +# fig_height = max(6, len(districts) * 0.35) +# fig, ax = plt.subplots(figsize=(8, fig_height)) +# +# bar_height = 0.35 +# offsets = np.linspace( +# -bar_height / 2, +# bar_height / 2, +# len(scenario_order) +# ) +# +# for offset, scenario in zip(offsets, scenario_order): +# values = ( +# percent_averted_by_district[scenario] +# .fillna(0) +# .values +# ) +# +# ax.barh( +# y + offset, +# values, +# height=bar_height, +# label=label_map.get(scenario, scenario), +# alpha=0.8, +# ) +# +# # Zero reference line +# ax.axvline(0, color="black", linewidth=1) +# +# # Y-axis +# ax.set_yticks(y) +# ax.set_yticklabels(districts) +# +# # Labels +# ax.set_ylabel("District") +# ax.set_xlabel( +# "% DALYs averted\n" +# "(total 2027–2034)\n" +# "compared to Baseline" +# ) +# +# # Match sketch style: first district at top +# ax.invert_yaxis() +# # Legend +# ax.legend() +# # Light grid +# ax.grid(axis="x", alpha=0.3) +# fig.tight_layout() +# +# return fig, ax + + +def plot_percent_dalys_averted(percent_averted): + fig, ax = plt.subplots(figsize=(7, 6)) + + # Keep desired ordering dynamically + ordered_scenarios = [ + s for s in percent_averted.index + if "More Nurses" in s + ] + [ + s for s in percent_averted.index + if "Fewer Nurses" in s + ] + + labels = [ + "More nurses" if "More Nurses" in s else "Fewer nurses" + for s in ordered_scenarios + ] + + means = percent_averted.loc[ + ordered_scenarios, "mean" + ].values + + lowers = percent_averted.loc[ + ordered_scenarios, "lower" + ].values + + uppers = percent_averted.loc[ + ordered_scenarios, "upper" + ].values + + yerr = np.vstack([ + means - lowers, + uppers - means, + ]) + + ax.bar( + labels, + means, + width=0.45, + yerr=yerr, + capsize=6, + ) + + ax.axhline(0, color="black", linewidth=1) + + ax.set_ylabel( + "% DALYs averted compared to Baseline\n" + "(total between 2027 and 2034)" + ) + + ax.grid(axis="y", alpha=0.3) + + fig.tight_layout() + + return fig, ax + + +def plot_percent_deaths_averted(percent_averted): + fig, ax = plt.subplots(figsize=(7, 6)) + + ordered_scenarios = [ + s for s in percent_averted.index + if "More Nurses" in s + ] + [ + s for s in percent_averted.index + if "Fewer Nurses" in s + ] + + labels = [ + "More nurses" if "More Nurses" in s else "Fewer nurses" + for s in ordered_scenarios + ] + + means = percent_averted.loc[ + ordered_scenarios, + "mean" + ].values + + lowers = percent_averted.loc[ + ordered_scenarios, + "lower" + ].values + + uppers = percent_averted.loc[ + ordered_scenarios, + "upper" + ].values + + yerr = np.vstack([ + means - lowers, + uppers - means, + ]) + + ax.bar( + labels, + means, + width=0.45, + yerr=yerr, + capsize=6, + ) + + ax.axhline(0, color="black", linewidth=1) + + ax.set_ylabel( + "% deaths averted compared to Baseline\n" + "(total between 2027 and 2034)" + ) + + ax.grid(axis="y", alpha=0.3) + + fig.tight_layout() + + return fig, ax + + +# Plot % DALYs averted by cause +def plot_percent_dalys_averted_by_cause( + default_df, + improved_df, + top_n=10, +): + # --------------------------------------------------------- + # Select top causes based on absolute impact + # --------------------------------------------------------- + ranking = ( + default_df["More nurses"].abs() + .sort_values(ascending=False) + ) + + top_causes = ranking.head(top_n).index.tolist() + + default_df = default_df.loc[top_causes] + improved_df = improved_df.loc[top_causes] + + # Reverse so largest appears at top + default_df = default_df.iloc[::-1] + improved_df = improved_df.iloc[::-1] + + # --------------------------------------------------------- + # Create subplots + # --------------------------------------------------------- + fig, axes = plt.subplots( + ncols=2, + figsize=(14, 8), + sharey=True, + ) + + panel_data = [ + (axes[0], default_df, "Default Healthsystem"), + (axes[1], improved_df, "Improved Healthsystem"), + ] + + for ax, df, title in panel_data: + y = np.arange(len(df)) + + # More nurses (positive) + ax.barh( + y, + df["More nurses"], + color="lightsteelblue", + label="More nurses", + ) + + # Fewer nurses (negative) + ax.barh( + y, + df["Fewer nurses"], + color="lightsteelblue", + label="Fewer nurses", + ) + + # Zero line + ax.axvline(0, color="black", linewidth=1) + # Cause labels + ax.set_yticks(y) + ax.set_yticklabels(df.index) + ax.set_xlabel("% DALYs averted") + ax.set_title(title) + ax.grid(axis="x", alpha=0.3) + + fig.suptitle( + "% DALYs averted by causes on national level\n(2027–2034)" + ) + fig.tight_layout() + return fig, axes + + +# Plot % deaths averted by cause +def plot_percent_deaths_averted_by_cause( + default_df, + improved_df, + top_n=10, +): + # --------------------------------------------------------- + # Select top causes + # --------------------------------------------------------- + ranking = ( + default_df["More nurses"].abs() + .sort_values(ascending=False) + ) + + top_causes = ranking.head(top_n).index.tolist() + + default_df = default_df.loc[top_causes] + improved_df = improved_df.loc[top_causes] + + # Reverse so largest appears at top + default_df = default_df.iloc[::-1] + improved_df = improved_df.iloc[::-1] + + # --------------------------------------------------------- + # Create subplots + # --------------------------------------------------------- + fig, axes = plt.subplots( + ncols=2, + figsize=(14, 8), + sharey=True, + ) + + panel_data = [ + (axes[0], default_df, "Default Healthsystem"), + (axes[1], improved_df, "Improved Healthsystem"), + ] + + for ax, df, title in panel_data: + y = np.arange(len(df)) + + ax.barh( + y, + df["More nurses"], + color="lightcoral", + label="More nurses", + ) + + ax.barh( + y, + df["Fewer nurses"], + color="lightcoral", + label="Fewer nurses", + ) + + ax.axvline(0, color="black", linewidth=1) + ax.set_yticks(y) + ax.set_yticklabels(df.index) + ax.set_xlabel("% deaths averted") + ax.set_title(title) + ax.grid(axis="x", alpha=0.3) + + fig.suptitle( + "% deaths averted by causes on national level\n(2027–2034)" + ) + + fig.tight_layout() + + return fig, axes + + + +# Plot % DALYs averted by age group +# ----------------------------------------------------------------------------- +def plot_percent_dalys_averted_by_age_group( + default_df, + improved_df, +): + # --------------------------------------------------------- + # Extract scenario DataFrames from dictionaries + # --------------------------------------------------------- + default_more = default_df[ + "More Nurses / Default Healthsystem Function" + ] + + default_fewer = default_df[ + "Fewer Nurses / Default Healthsystem Function" + ] + + improved_more = improved_df[ + "More Nurses / Improved Healthsystem Function" + ] + + improved_fewer = improved_df[ + "Fewer Nurses / Improved Healthsystem Function" + ] + + # --------------------------------------------------------- + # Order age groups + # --------------------------------------------------------- + age_order = [ + "0-4", + "5-9", + "10-14", + "15-19", + "20-24", + "25-29", + "30-34", + "35-39", + "40-44", + "45-49", + "50-54", + "55-59", + "60-64", + "65-69", + "70-74", + "75-79", + "80+", + ] + + for df in [ + default_more, + default_fewer, + improved_more, + improved_fewer, + ]: + df = df.reindex(age_order) + + default_more = default_more.reindex(age_order) + default_fewer = default_fewer.reindex(age_order) + + improved_more = improved_more.reindex(age_order) + improved_fewer = improved_fewer.reindex(age_order) + + # Reverse so oldest ages appear at top + default_more = default_more.iloc[::-1] + default_fewer = default_fewer.iloc[::-1] + + improved_more = improved_more.iloc[::-1] + improved_fewer = improved_fewer.iloc[::-1] + + # --------------------------------------------------------- + # Create subplots + # --------------------------------------------------------- + fig, axes = plt.subplots( + ncols=2, + figsize=(14, 8), + sharey=True, + ) + + panel_data = [ + ( + axes[0], + default_more, + default_fewer, + "Default Healthsystem", + ), + ( + axes[1], + improved_more, + improved_fewer, + "Improved Healthsystem", + ), + ] + + for ax, more, fewer, title in panel_data: + y = np.arange(len(more)) + + # More nurses + ax.barh( + y - 0.2, + more["mean"], + height=0.35, + color="steelblue", + label="More Nurses", + ) + + # Fewer nurses + ax.barh( + y + 0.2, + fewer["mean"], + height=0.35, + color="indianred", + label="Fewer Nurses", + ) + + # CI for More Nurses + ax.errorbar( + more["mean"], + y - 0.2, + xerr=[ + more["mean"] - more["lower"], + more["upper"] - more["mean"], + ], + fmt="none", + color="black", + capsize=3, + ) + + # CI for Fewer Nurses + ax.errorbar( + fewer["mean"], + y + 0.2, + xerr=[ + fewer["mean"] - fewer["lower"], + fewer["upper"] - fewer["mean"], + ], + fmt="none", + color="black", + capsize=3, + ) + + ax.axvline(0, color="black") + + ax.set_yticks(y) + ax.set_yticklabels(more.index) + + ax.set_xlabel("% DALYs averted") + ax.set_title(title) + + ax.grid(axis="x", alpha=0.3) + + fig.suptitle( + "% DALYs averted by age group on national level\n(2027–2034)" + ) + + # Add legend + handles, labels = axes[0].get_legend_handles_labels() + + fig.legend( + handles, + labels, + loc="lower center", + ncol=2, + frameon=False, + ) + + fig.tight_layout() + + return fig, axes + + +# ----------------------------------------------------------------------------- +# Plot % deaths averted by age group +# ----------------------------------------------------------------------------- +def plot_percent_deaths_averted_by_age_group( + default_df, + improved_df, +): + + # --------------------------------------------------------- + # Order age groups properly + # --------------------------------------------------------- + age_order = [ + "0-4", + "5-9", + "10-14", + "15-19", + "20-24", + "25-29", + "30-34", + "35-39", + "40-44", + "45-49", + "50-54", + "55-59", + "60-64", + "65-69", + "70-74", + "75-79", + "80+", + ] + + default_df = ( + default_df + .reindex(age_order) + .dropna(how="all") + ) + + improved_df = ( + improved_df + .reindex(age_order) + .dropna(how="all") + ) + + # Reverse for plotting + default_df = default_df.iloc[::-1] + improved_df = improved_df.iloc[::-1] + + # --------------------------------------------------------- + # Create subplots + # --------------------------------------------------------- + fig, axes = plt.subplots( + ncols=2, + figsize=(14, 8), + sharey=False, + ) + + panel_data = [ + (axes[0], default_df, "Default Healthsystem"), + (axes[1], improved_df, "Improved Healthsystem"), + ] + + for ax, df, title in panel_data: + + y = np.arange(len(df)) + + # More nurses + ax.barh( + y, + df["More nurses"], + color="lightcoral", + label="More nurses", + ) + + # Fewer nurses + ax.barh( + y, + df["Fewer nurses"], + color="lightcoral", + label="Fewer nurses", + ) + + # Zero line + ax.axvline(0, color="black", linewidth=1) + # Labels + ax.set_yticks(y) + ax.set_yticklabels(df.index) + ax.set_xlabel("% deaths averted") + ax.set_title(title) + ax.grid(axis="x", alpha=0.3) + + fig.suptitle( + "% deaths averted by age group on national level\n(2027–2034)" + ) + + fig.tight_layout() + + return fig, axes + + +# ----------------------------------------------------------------------------- +# Main +# ----------------------------------------------------------------------------- +if __name__ == "__main__": + + parser = argparse.ArgumentParser( + "Analyse DALYs across nurse staffing scenarios" + ) + parser.add_argument( + "--scenario-outputs-folder", + type=Path, + required=True, + help="Path to folder containing scenario outputs", + ) + parser.add_argument( + "--show-figures", + action="store_true", + help="Whether to interactively show figures", + ) + parser.add_argument( + "--save-figures", + action="store_true", + help="Whether to save figures to results folder", + ) + args = parser.parse_args() + + # Use command-line folder + results_folder = args.scenario_outputs_folder + + # Optional: load logs + log = load_pickled_dataframes(results_folder) + + # ADD THIS LINE - Debug to find facility data + # facility_key = find_facility_level_data(results_folder) + # print(f"\n✓ Found facility-level data in key: {facility_key}") + # print("=" * 60 + "\n") + + # daly_cols = check_all_dalys_columns(results_folder) + # print(f"\n✓ Found DALY columns: {daly_cols}") + # check_death_columns(results_folder) + + # inspect_population_log(results_folder) + + # Get scenario names from scenario class + param_names = tuple(StaffingScenario()._scenarios.keys()) + + print("\nPARAM NAMES:") + print(param_names) + + # Scenarios to keep (Default Healthsystem Function only) + default_hs_scenarios = [ + "Baseline Nurses / Default Healthsystem Function", + "Fewer Nurses / Default Healthsystem Function", + "More Nurses / Default Healthsystem Function", + ] + + baseline_scenario = "Baseline Nurses / Default Healthsystem Function" + + improved_hs_scenarios = [ + "Baseline Nurses / Improved Healthsystem Function", + "Fewer Nurses / Improved Healthsystem Function", + "More Nurses / Improved Healthsystem Function", + ] + + baseline_improved_scenario = ( + "Baseline Nurses / Improved Healthsystem Function" + ) + + # ------------------------------------------------------------------------- + # Extract annual DALYs + # ------------------------------------------------------------------------- + annual_dalys = extract_annual_dalys(results_folder).pipe( + set_param_names_as_column_index_level_0, + param_names=param_names, + ) + + # Summarize across runs + # Filter to Default Healthsystem Function scenarios only + summarized_annual_dalys = summarize(annual_dalys) + + # Filter to Default Healthsystem Function scenarios only + summarized_annual_dalys_default = summarized_annual_dalys.loc[ + :, + summarized_annual_dalys.columns.get_level_values(0).isin( + default_hs_scenarios + ), + ] + + # Filter to Improved Healthsystem Function scenarios only + summarized_annual_dalys_improved = summarized_annual_dalys.loc[ + :, + summarized_annual_dalys.columns.get_level_values(0).isin( + improved_hs_scenarios + ), + ] + + print("\nALL DALY SCENARIOS:") + print( + summarized_annual_dalys.columns.get_level_values(0).unique().tolist() + ) + + print("\nFILTERED IMPROVED DALY SCENARIOS:") + print( + summarized_annual_dalys_improved.columns.get_level_values(0).unique().tolist() + ) + + # ------------------------------------------------------------------------- + # Plot 1: Annual DALYs over time + # ------------------------------------------------------------------------- + fig_1, ax_1 = plot_annual_dalys(summarized_annual_dalys_default) + + # ------------------------------------------------------------------------- + # Plot 2: Percent DALYs averted relative to baseline (2027–2034) + # ------------------------------------------------------------------------- + percent_averted = calculate_percent_dalys_averted( + summarized_annual_dalys_default, + baseline_scenario=baseline_scenario, + comparison_years=range(2027, 2035), # 2027 to 2034 inclusive + ) + + fig_2, ax_2 = plot_percent_dalys_averted(percent_averted) + + # Sensitivity analysis: DALYs under Improved Healthsystem Function + fig_5, ax_5 = plot_annual_dalys( + summarized_annual_dalys_improved + ) + + percent_averted_improved = calculate_percent_dalys_averted( + summarized_annual_dalys_improved, + baseline_scenario=baseline_improved_scenario, + comparison_years=range(2027, 2035), + ) + + print("\nPERCENT DALYS AVERTED IMPROVED:") + print(percent_averted_improved) + + fig_6, ax_6 = plot_percent_dalys_averted( + percent_averted_improved + ) + + # ------------------------------------------------------------------------- + # Plot 3: Percent DALYs averted by district (2027–2034) + # ------------------------------------------------------------------------- + # annual_dalys_by_district = extract_annual_dalys_by_district( + # results_folder + # ).pipe( + # set_param_names_as_column_index_level_0, + # param_names=param_names, + # ) + + # Summarize across runs + # summarized_annual_dalys_by_district = summarize( + # annual_dalys_by_district + # ) + # + # # Filter to Default Healthsystem Function scenarios only + # summarized_annual_dalys_by_district = ( + # summarized_annual_dalys_by_district.loc[ + # :, + # summarized_annual_dalys_by_district.columns + # .get_level_values(0) + # .isin(default_hs_scenarios) + # ] + # ) + + # Calculate district-level % DALYs averted + # percent_averted_by_district = ( + # calculate_percent_dalys_averted_by_district( + # summarized_annual_dalys_by_district, + # baseline_scenario=baseline_scenario, + # comparison_years=range(2027, 2035), # 2027 to 2034 inclusive + # ) + # ) + # + # # Create district-level plot + # fig_3, ax_3 = plot_percent_dalys_averted_by_district( + # percent_averted_by_district + # ) + + # ------------------------------------------------------------------------- + # Extract annual deaths + # ------------------------------------------------------------------------- + annual_deaths = extract_annual_deaths(results_folder).pipe( + set_param_names_as_column_index_level_0, + param_names=param_names, + ) + + summarized_annual_deaths = summarize(annual_deaths) + + # Default Healthsystem Function deaths + summarized_annual_deaths_default = summarized_annual_deaths.loc[ + :, + summarized_annual_deaths.columns.get_level_values(0).isin( + default_hs_scenarios + ), + ] + + # Improved Healthsystem Function deaths + summarized_annual_deaths_improved = summarized_annual_deaths.loc[ + :, + summarized_annual_deaths.columns.get_level_values(0).isin( + improved_hs_scenarios + ), + ] + + # Plot annual deaths + # ------------------------------------------------------------------------- + fig_3, ax_3 = plot_annual_deaths( + summarized_annual_deaths_default + ) + + # ------------------------------------------------------------------------- + # Plot % deaths averted + # ------------------------------------------------------------------------- + percent_deaths_averted = calculate_percent_deaths_averted( + summarized_annual_deaths_default, + baseline_scenario=baseline_scenario, + comparison_years=range(2027, 2035), + ) + + fig_4, ax_4 = plot_percent_deaths_averted( + percent_deaths_averted + ) + + # Sensitivity analysis: deaths under Improved Healthsystem Function + fig_7, ax_7 = plot_annual_deaths( + summarized_annual_deaths_improved + ) + + percent_deaths_averted_improved = calculate_percent_deaths_averted( + summarized_annual_deaths_improved, + baseline_scenario=baseline_improved_scenario, + comparison_years=range(2027, 2035), + ) + + print("\nPERCENT DEATHS AVERTED IMPROVED:") + print(percent_deaths_averted_improved) + + fig_8, ax_8 = plot_percent_deaths_averted( + percent_deaths_averted_improved + ) + + # Extract deaths by cause + # ------------------------------------------------------------------------- + deaths_by_cause = extract_deaths_by_cause(results_folder).pipe( + set_param_names_as_column_index_level_0, + param_names=param_names, + ) + + summarized_deaths_by_cause = summarize(deaths_by_cause) + + summarized_deaths_by_cause_default = ( + summarized_deaths_by_cause.loc[ + :, + summarized_deaths_by_cause.columns + .get_level_values(0) + .isin(default_hs_scenarios) + ] + ) + + percent_deaths_by_cause_default = ( + calculate_percent_deaths_averted_by_cause( + summarized_deaths_by_cause_default, + baseline_scenario=baseline_scenario, + ) + ) + + summarized_deaths_by_cause_improved = ( + summarized_deaths_by_cause.loc[ + :, + summarized_deaths_by_cause.columns + .get_level_values(0) + .isin(improved_hs_scenarios) + ] + ) + + percent_deaths_by_cause_improved = ( + calculate_percent_deaths_averted_by_cause( + summarized_deaths_by_cause_improved, + baseline_scenario=baseline_improved_scenario, + ) + ) + + fig_10, ax_10 = plot_percent_deaths_averted_by_cause( + percent_deaths_by_cause_default, + percent_deaths_by_cause_improved, + top_n=10, + ) + + # ------------------------------------------------------------------------- + # Extract deaths by age group + # ------------------------------------------------------------------------- + deaths_by_age_group = extract_deaths_by_age_group( + results_folder + ).pipe( + set_param_names_as_column_index_level_0, + param_names=param_names, + ) + + summarized_deaths_by_age_group = summarize( + deaths_by_age_group + ) + + # Deaths by cause Default + summarized_deaths_by_age_group_default = ( + summarized_deaths_by_age_group.loc[ + :, + summarized_deaths_by_age_group.columns + .get_level_values(0) + .isin(default_hs_scenarios) + ] + ) + + percent_deaths_by_age_default = ( + calculate_percent_deaths_averted_by_age_group( + summarized_deaths_by_age_group_default, + baseline_scenario=baseline_scenario, + ) + ) + + # Deaths by cause Improved + summarized_deaths_by_age_group_improved = ( + summarized_deaths_by_age_group.loc[ + :, + summarized_deaths_by_age_group.columns + .get_level_values(0) + .isin(improved_hs_scenarios) + ] + ) + + percent_deaths_by_age_improved = ( + calculate_percent_deaths_averted_by_age_group( + summarized_deaths_by_age_group_improved, + baseline_scenario=baseline_improved_scenario, + ) + ) + + fig_12, ax_12 = plot_percent_deaths_averted_by_age_group( + percent_deaths_by_age_default, + percent_deaths_by_age_improved, + ) + + # Extract DALYs by cause + # ------------------------------------------------------------------------- + dalys_by_cause = extract_dalys_by_cause(results_folder).pipe( + set_param_names_as_column_index_level_0, + param_names=param_names, + ) + + summarized_dalys_by_cause = summarize(dalys_by_cause) + + # DALYs by cause Default + summarized_dalys_by_cause_default = ( + summarized_dalys_by_cause.loc[ + :, + summarized_dalys_by_cause.columns + .get_level_values(0) + .isin(default_hs_scenarios) + ] + ) + + percent_by_cause_default = ( + calculate_percent_dalys_averted_by_cause( + summarized_dalys_by_cause_default, + baseline_scenario=baseline_scenario, + ) + ) + + # DALYs by cause Improved + summarized_dalys_by_cause_improved = ( + summarized_dalys_by_cause.loc[ + :, + summarized_dalys_by_cause.columns + .get_level_values(0) + .isin(improved_hs_scenarios) + ] + ) + + percent_by_cause_improved = ( + calculate_percent_dalys_averted_by_cause( + summarized_dalys_by_cause_improved, + baseline_scenario=baseline_improved_scenario, + ) + ) + + fig_9, ax_9 = plot_percent_dalys_averted_by_cause( + percent_by_cause_default, + percent_by_cause_improved, + top_n=10, + ) + + # Extract DALYs by age group + # ------------------------------------------------------------------------- + dalys_by_age_group = extract_dalys_by_age_group( + results_folder + ).pipe( + set_param_names_as_column_index_level_0, + param_names=param_names, + ) + + summarized_dalys_by_age_group = summarize( + dalys_by_age_group + ) + + # DALYs by age group Default + summarized_dalys_by_age_group_default = ( + summarized_dalys_by_age_group.loc[ + :, + summarized_dalys_by_age_group.columns + .get_level_values(0) + .isin(default_hs_scenarios) + ] + ) + + percent_dalys_by_age_default = ( + calculate_percent_dalys_averted_by_age_group( + summarized_dalys_by_age_group_default, + baseline_scenario=baseline_scenario, + ) + ) + + # DALYs by age group Improved + summarized_dalys_by_age_group_improved = ( + summarized_dalys_by_age_group.loc[ + :, + summarized_dalys_by_age_group.columns + .get_level_values(0) + .isin(improved_hs_scenarios) + ] + ) + + percent_dalys_by_age_improved = ( + calculate_percent_dalys_averted_by_age_group( + summarized_dalys_by_age_group_improved, + baseline_scenario=baseline_improved_scenario, + ) + ) + + print("\nDEFAULT AGE GROUP OBJECT:") + print(type(percent_dalys_by_age_default)) + print(percent_dalys_by_age_default.keys()) + + print("\nIMPROVED AGE GROUP OBJECT:") + print(type(percent_dalys_by_age_improved)) + print(percent_dalys_by_age_improved.keys()) + + fig_11, ax_11 = plot_percent_dalys_averted_by_age_group( + percent_dalys_by_age_default, + percent_dalys_by_age_improved, + ) + + + + # ------------------------------------------------------------------------- + # Show figures + # ------------------------------------------------------------------------- + if args.show_figures: + plt.show() + + # ------------------------------------------------------------------------- + # Save figures + # ------------------------------------------------------------------------- + if args.save_figures: + fig_1.savefig( + results_folder / "annual_dalys_across_scenarios.pdf", + bbox_inches="tight", + ) + + fig_2.savefig( + results_folder / "percent_dalys_averted_vs_baseline_2027_2034.pdf", + bbox_inches="tight", + ) + + # fig_3.savefig( + # results_folder / "percent_dalys_averted_by_district_2027_2034.pdf", + # bbox_inches="tight", + # ) + + fig_3.savefig( + results_folder / "annual_deaths_across_scenarios.pdf", + bbox_inches="tight", + ) + + fig_4.savefig( + results_folder / "percent_deaths_averted_vs_baseline_2027_2034.pdf", + bbox_inches="tight", + ) + + # Sensitivity-analysis DALY figures + fig_5.savefig( + results_folder / + "annual_dalys_across_scenarios_improved_healthsystem.pdf", + bbox_inches="tight", + ) + + fig_6.savefig( + results_folder / + "percent_dalys_averted_vs_baseline_2027_2034_improved_healthsystem.pdf", + bbox_inches="tight", + ) + + # Sensitivity-analysis death figures + fig_7.savefig( + results_folder / + "annual_deaths_across_scenarios_improved_healthsystem.pdf", + bbox_inches="tight", + ) + + fig_8.savefig( + results_folder / + "percent_deaths_averted_vs_baseline_2027_2034_improved_healthsystem.pdf", + bbox_inches="tight", + ) + + fig_9.savefig( + results_folder / + "percent_dalys_averted_by_cause_national_level.pdf", + bbox_inches="tight", + ) + + fig_10.savefig( + results_folder / + "percent_deaths_averted_by_cause_national_level.pdf", + bbox_inches="tight", + ) + + fig_11.savefig( + results_folder / + "percent_dalys_averted_by_age_group_national_level.pdf", + bbox_inches="tight", + ) + + fig_12.savefig( + results_folder / + "percent_deaths_averted_by_age_group_national_level.pdf", + bbox_inches="tight", + ) diff --git a/src/scripts/nurses_analyses/analysis_staff_num_more.py b/src/scripts/nurses_analyses/analysis_staff_num_more.py index ddca4c504d..c6a5feb66b 100644 --- a/src/scripts/nurses_analyses/analysis_staff_num_more.py +++ b/src/scripts/nurses_analyses/analysis_staff_num_more.py @@ -1,466 +1,267 @@ import argparse -from collections import Counter, defaultdict from pathlib import Path -from typing import Dict, Tuple -import numpy as np +import matplotlib.pyplot as plt import pandas as pd -import squarify -from matplotlib import pyplot as plt -from tlo import Date +from scripts.nurses_analyses.nurses_scenario_analyses import StaffingScenario + from tlo.analysis.utils import ( - COARSE_APPT_TYPE_TO_COLOR_MAP, - SHORT_TREATMENT_ID_TO_COLOR_MAP, - _standardize_short_treatment_id, - # DON'T import bin_hsi_event_details from utils - compute_mean_across_runs, extract_results, - get_coarse_appt_type, - get_color_short_treatment_id, - load_pickled_dataframes, - order_of_short_treatment_ids, - plot_stacked_bar_chart, - squarify_neat, summarize, - unflatten_flattened_multi_index_in_logging, ) -import re -from scripts.nurses_analyses.nurses_scenario_analyses import StaffingScenario - -# Declare period for which the results will be generated (defined inclusively) -TARGET_PERIOD = (Date(2010, 1, 1), Date(2034, 12, 31)) - - -def drop_outside_period(_df): - """Return a dataframe which only includes for which the date is within the limits defined by TARGET_PERIOD""" - return _df.drop(index=_df.index[~_df['date'].between(*TARGET_PERIOD)]) - - -def figure4_hr_use_overall(results_folder: Path, output_folder: Path, resourcefilepath: Path): - """ 'Figure 4': The level of usage of the HealthSystem HR Resources """ - - make_graph_file_name = lambda stub: output_folder / f"Fig4_{stub}.png" # noqa: E731 - - def get_share_of_time_for_hw_in_each_facility_by_short_treatment_id(_df): - - _df = drop_outside_period(_df) - _df = _df.set_index("date") - - nurse_cols = [ - c for c in _df.columns - if "Officer_Nursing_and_Midwifery" in c - ] - - if len(nurse_cols) == 0: - return None - - nurse_df = _df[nurse_cols] - - # Mean usage across all nurse facilities - nurse_df = nurse_df.copy() - nurse_df.loc[:, "All"] = nurse_df.mean(axis=1) - # nurse_df["All"] = nurse_df.mean(axis=1) - - return nurse_df.resample("M").mean().stack() - - def get_share_of_time_used_for_each_officer_at_each_level(_df): - - _df = drop_outside_period(_df) - _df = _df.set_index("date") - - # Columns look like: - # clinic=GenericClinic|facID_and_officer=FacilityID_0_Officer_Nursing_and_Midwifery - - officer_cols = [ - c for c in _df.columns if "FacilityID_" in c and "Officer_" in c - ] - - if len(officer_cols) == 0: - return None - - officer_df = _df[officer_cols].copy() - # Load Master Facility List - mfl = pd.read_csv( - Path("./resources/healthsystem/organisation/ResourceFile_Master_Facilities_List.csv") - ).set_index("Facility_ID") - results = [] - - for col in officer_cols: - - col_string = str(col) - - # Extract facility ID - fac_match = re.search(r'FacilityID_(\d+)', col_string) - if fac_match is None: - continue - fid = int(fac_match.group(1)) - - # Extract cadre - officer_match = re.search(r'Officer_(.*)', col_string) - if officer_match is None: - continue - cadre = officer_match.group(1) - - # Get facility level - if fid not in mfl.index: - continue - - level = mfl.loc[fid, "Facility_Level"] - level = "2" if level == "1b" else level - - # Compute mean usage - mean_val = officer_df[col].mean() - - results.append((cadre, level, mean_val)) - - if len(results) == 0: - return None +# ----------------------------------------------------------------------------- +# Rename draw numbers to scenario names +# ----------------------------------------------------------------------------- +def set_param_names_as_column_index_level_0(_df, param_names): - result_df = pd.DataFrame(results, columns=["Cadre", "Facility_Level", "Usage"]) + ordered_param_names = { + i: x for i, x in enumerate(param_names) + } - return result_df.groupby(["Cadre", "Facility_Level"])["Usage"].mean() + names_of_cols_level0 = [ + ordered_param_names.get(col) + for col in _df.columns.levels[0] + ] - capacity_by_facility = summarize( - extract_results( - results_folder, - module='tlo.methods.healthsystem.summary', - key='Capacity_By_FacID_and_Officer', - custom_generate_series=get_share_of_time_for_hw_in_each_facility_by_short_treatment_id, - do_scaling=False - ), - only_mean=True, - collapse_columns=True + _df.columns = _df.columns.set_levels( + names_of_cols_level0, + level=0 ) - capacity_by_officer = summarize( - extract_results( - results_folder, - module='tlo.methods.healthsystem.summary', - key='Capacity_By_FacID_and_Officer', - custom_generate_series=get_share_of_time_used_for_each_officer_at_each_level, - do_scaling=False - ), - only_mean=True, - collapse_columns=True - ) + return _df - # Find the levels of each facility - mfl = pd.read_csv( - resourcefilepath / 'healthsystem' / 'organisation' / 'ResourceFile_Master_Facilities_List.csv' - ).set_index('Facility_ID') - - def find_level_for_facility(col_name): - # Skip aggregated column - if col_name == "All": - return None - - match = re.search(r'FacilityID_(\d+)', str(col_name)) - - if match is None: - return None - - fid = int(match.group(1)) - - level = mfl.loc[fid, "Facility_Level"] - - return "2" if level == "1b" else level - - # def find_level_for_facility(col_tuple): - # # Extract the text part - # col_string = col_tuple[2] - # - # # Extract facility ID number - # match = re.search(r'FacilityID_(\d+)', col_string) - # fid = int(match.group(1)) - # - # level = mfl.loc[fid, "Facility_Level"] - # return "2" if level == "1b" else level - # def find_level_for_facility(id): - # return mfl.loc[id].Facility_Level if mfl.loc[id].Facility_Level != '1b' else '2' - # def find_level_for_facility(fid): - # level = mfl.loc[fid, "Facility_Level"] - # return "2" if level == "1b" else level - - color_for_level = {'0': 'blue', '1a': 'yellow', '1b': 'green', '2': 'grey', '3': 'orange', '4': 'black', - '5': 'white'} - - fig, ax = plt.subplots() - name_of_plot = 'Usage of Healthcare Worker Time By Month' - capacity_unstacked = capacity_by_facility.unstack() - for i in capacity_unstacked.columns: - - level = find_level_for_facility(i) - - if level is None: - continue - - h1, = ax.plot( - capacity_unstacked[i].index, - capacity_unstacked[i].values, - color=color_for_level[level], - linewidth=0.5, - label=f'Facility_Level {level}' - ) - # for i in capacity_unstacked.columns: - # if i != 'All': - # level = find_level_for_facility(i) - # h1, = ax.plot(capacity_unstacked[i].index, capacity_unstacked[i].values, - # color=color_for_level[level], linewidth=0.5, label=f'Facility_Level {level}') - - if 'All' in capacity_unstacked.columns: - h2, = ax.plot( - capacity_unstacked['All'].index, - capacity_unstacked['All'].values, - color='red', - linewidth=1.5 - ) - ax.legend([h1, h2], ['Each Facility', 'All Facilities']) - else: - ax.legend([h1], ['Each Facility']) - ax.set_title(name_of_plot) - ax.set_xlabel('Month') - ax.set_ylabel('Fraction of all time used\n(Average for the month)') +# ----------------------------------------------------------------------------- +# Extract annual staffing counts +# ----------------------------------------------------------------------------- +def get_yearly_hr_count(df): - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) - fig.tight_layout() - fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_'))) - plt.close(fig) - - fig, ax = plt.subplots() - name_of_plot = 'Usage of Healthcare Worker Time (Average)' - capacity_unstacked_average = capacity_by_facility.unstack().mean() - # levels = [find_level_for_facility(i) if i != 'All' else 'All' for i in capacity_unstacked_average.index] - xpos_for_level = dict(zip((color_for_level.keys()), range(len(color_for_level)))) - xpos_for_level.update({'1b': 2, '2': 2, '3': 3, '4': 4, '5': 5}) - for id, val in capacity_unstacked_average.items(): - if id != 'All': - _level = find_level_for_facility(id) - - # Skip if facility level could not be determined - if _level is None: - continue - - if _level != '5': - xpos = xpos_for_level[_level] - scatter = (np.random.rand() - 0.5) * 0.25 - h1, = ax.plot(xpos + scatter, val * 100, color=color_for_level[_level], - marker='.', markersize=15, label='Each Facility', linestyle='none') - if 'All' in capacity_unstacked_average.index: - h2 = ax.axhline( - y=capacity_unstacked_average['All'] * 100, - color='red', - linestyle='--', - label='Average' - ) - ax.set_title(name_of_plot) - ax.set_xlabel('Facility_Level') - ax.set_xticks(list(xpos_for_level.values())) - ax.set_xticklabels(xpos_for_level.keys()) - ax.set_ylabel('Percent of Time Available That is Used\n') - ax.legend(handles=[h1, h2]) - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) - fig.tight_layout() - fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_'))) - plt.close(fig) - - fig, ax = plt.subplots() - name_of_plot = 'Usage of Healthcare Worker Time by Cadre and Facility_Level' - (100.0 * capacity_by_officer.unstack()).T.plot.bar(ax=ax) - ax.legend() - ax.set_xlabel('Facility_Level') - ax.set_ylabel('Percent of time that is used') - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) - ax.set_title(name_of_plot) - fig.tight_layout() - fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_'))) - plt.close(fig) - - -def get_yearly_hr_count(_df): - - if 'GenericClinic' not in _df.columns: + if 'GenericClinic' not in df.columns: return None - _df['year'] = _df['date'].dt.year + df['year'] = df['date'].dt.year - # Expand facility dictionary - staff_df = _df['GenericClinic'].apply(pd.Series) + # Expand dictionary + staff_df = df['GenericClinic'].apply(pd.Series) - # Extract cadre names - staff_df.columns = [c.split('Officer_')[-1] for c in staff_df.columns] + # Keep cadre names only + staff_df.columns = [ + c.split('Officer_')[-1] + for c in staff_df.columns + ] # Sum facilities within cadre staff_df = staff_df.groupby(level=0, axis=1).sum() # Add year - staff_df['year'] = _df['year'] + staff_df['year'] = df['year'] - # Sum within year + # Annual totals staff_df = staff_df.groupby('year').sum() - POP_SCALE = 145.39609 - # POP_SCALE = 1000 - staff_df = staff_df * POP_SCALE - # Convert to stacked series (year,cadre → value) + # Scale population + # POP_SCALE = 145.39609 + # staff_df = staff_df * POP_SCALE + return staff_df.stack() def extract_staff_counts(results_folder): + return extract_results( results_folder, module="tlo.methods.healthsystem.summary", key="number_of_hcw_staff", custom_generate_series=get_yearly_hr_count, - do_scaling=False + do_scaling=False, ) -def set_param_names_as_column_index_level_0(_df, param_names): - """Set column index level 0 (draw numbers) to scenario names.""" - ordered_param_names = {i: x for i, x in enumerate(param_names)} - names_of_cols_level0 = [ - ordered_param_names.get(col) - for col in _df.columns.levels[0] - ] - _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) - return _df - +# ----------------------------------------------------------------------------- +# Prepare plotting dataframe +# ----------------------------------------------------------------------------- +def prepare_staffing_totals(summary_df): -def plot_staff_counts_by_cadre_across_scenarios(staff_counts_summary, output_folder): - scenario_names = staff_counts_summary.columns.get_level_values(0).unique() - cadres = staff_counts_summary.index.get_level_values(1).unique() - - for cadre in cadres: + scenarios = ( + summary_df.columns + .get_level_values(0) + .unique() + ) - fig, ax = plt.subplots() + results = {} - for scenario in scenario_names: + for scenario in scenarios: - central = staff_counts_summary[(scenario, "mean")].xs(cadre, level=1) - lower = staff_counts_summary[(scenario, "lower")].xs(cadre, level=1) - upper = staff_counts_summary[(scenario, "upper")].xs(cadre, level=1) + mean_df = summary_df[(scenario, "mean")].unstack() - years = central.index + # Nurses + nurses = mean_df["Nursing_and_Midwifery"] - ax.plot( - years, - central.values, - label=scenario - ) + # Other cadres + other_cadres = mean_df.drop( + columns=["Nursing_and_Midwifery"], + errors="ignore" + ).sum(axis=1) - # ax.fill_between( - # years, - # lower.values, - # upper.values, - # alpha=0.25 - # ) - ax.fill_between( - years, - np.maximum(lower.values, 0), - upper.values, - alpha=0.25 - ) + results[scenario] = pd.DataFrame({ + "Nurses": nurses, + "Other cadres": other_cadres, + }) - ax.set_title(f"{cadre} Staff Counts Across Scenarios") - ax.set_xlabel("Year") - ax.set_ylabel("Number of Health Workers") + return results - ax.legend() - fig.tight_layout() +# ----------------------------------------------------------------------------- +# Plot staffing counts +# ----------------------------------------------------------------------------- +def plot_staffing_counts( + staffing_results, + scenarios, + title, +): - fig.savefig(output_folder / f"{cadre}_staff_counts_across_scenarios.png") + fig, ax = plt.subplots(figsize=(10, 6)) - plt.close(fig) + label_map = { + "Baseline Nurses": "Baseline nurses", + "Fewer Nurses": "Fewer nurses", + "More Nurses": "More nurses", + } + # Plot nurse scenarios + for scenario in scenarios: -def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None): - """Description of the usage of healthcare system resources.""" + df = staffing_results[scenario] - # figure2_appointments_used( - # results_folder=results_folder, output_folder=output_folder, resourcefilepath=resourcefilepath - # ) - log = load_pickled_dataframes(results_folder, 0, 0) - print(log.keys()) + label = None - print(log['tlo.methods.healthsystem.summary'].keys()) + for key in label_map: + if key in scenario: + label = f"Nurses, {label_map[key]}" - # STEP 1: extract staff counts - staff_counts = extract_staff_counts(results_folder) + ax.plot( + df.index, + df["Nurses"], + linewidth=2, + label=label, + ) - # STEP 2: rename draws to scenario names - param_names = tuple(StaffingScenario()._scenarios.keys()) + # Plot other cadres once + other_df = staffing_results[scenarios[0]] - staff_counts = staff_counts.pipe( - set_param_names_as_column_index_level_0, - param_names=param_names + ax.plot( + other_df.index, + other_df["Other cadres"], + linewidth=2.5, + linestyle="--", + color="black", + label="Other cadres total", ) - # STEP 3: summarize runs - print(type(staff_counts)) - print(staff_counts.head()) - staff_counts_summary = summarize(staff_counts) - - print("\n=== Staff counts from 2025–2034 ===") + ax.set_xlabel("Year") + ax.set_ylabel("Annual staff count") - # Select years 2025–2034 - years_to_check = range(2025, 2035) + ax.set_title(title) - export_df = staff_counts_summary.reset_index() + ax.legend() - # Filter the years - export_df = export_df[export_df["year"].isin(years_to_check)] + ax.grid(alpha=0.3) - # Save to Excel - export_path = output_folder / "debug_staff_counts_2025_2034.xlsx" - export_df.to_excel(export_path) - - print(f"Staff counts exported to: {export_path}") + fig.tight_layout() - # STEP 4: plot - plot_staff_counts_by_cadre_across_scenarios(staff_counts_summary, output_folder) - - figure4_hr_use_overall( - results_folder=results_folder, output_folder=output_folder, resourcefilepath=resourcefilepath - ) + return fig, ax +# ----------------------------------------------------------------------------- +# Main +# ----------------------------------------------------------------------------- if __name__ == "__main__": + parser = argparse.ArgumentParser() parser.add_argument( "--scenario-outputs-folder", type=Path, required=True, - help="Path to folder containing scenario outputs", ) + parser.add_argument( "--show-figures", action="store_true", - help="Whether to interactively show figures", ) + parser.add_argument( "--save-figures", action="store_true", - help="Whether to save figures", ) + args = parser.parse_args() - # Use the command-line argument instead of hardcoded path results_folder = args.scenario_outputs_folder - # results_folder = Path( - # './outputs/wamulwafu@kuhes.ac.mw/nurses_scenario_outputs-2026-02-09T110530Z' - # ) - - apply( - results_folder=results_folder, # or directly: args.scenario_outputs_folder - output_folder=results_folder, - resourcefilepath=Path('./resources') + + # Scenario names + param_names = tuple( + StaffingScenario()._scenarios.keys() + ) + + # Extract + staff_counts = extract_staff_counts( + results_folder + ).pipe( + set_param_names_as_column_index_level_0, + param_names=param_names, + ) + + # Summarize + summarized_staff_counts = summarize( + staff_counts ) + + # Prepare totals + staffing_results = prepare_staffing_totals( + summarized_staff_counts + ) + + # Scenario groups + default_hs_scenarios = [ + "Baseline Nurses / Default Healthsystem Function", + "Fewer Nurses / Default Healthsystem Function", + "More Nurses / Default Healthsystem Function", + ] + + improved_hs_scenarios = [ + "Baseline Nurses / Improved Healthsystem Function", + "Fewer Nurses / Improved Healthsystem Function", + "More Nurses / Improved Healthsystem Function", + ] + + # Plot default HS + fig1, ax1 = plot_staffing_counts( + staffing_results, + default_hs_scenarios, + title="Annual staffing count\nDefault Healthsystem", + ) + + # Plot improved HS + fig2, ax2 = plot_staffing_counts( + staffing_results, + improved_hs_scenarios, + title="Annual staffing count\nImproved Healthsystem", + ) + + if args.save_figures: + fig1.savefig( + results_folder / "annual_staffing_default_hs.pdf", + bbox_inches="tight" + ) + + fig2.savefig( + results_folder / "annual_staffing_improved_hs.pdf", + bbox_inches="tight" + ) + + if args.show_figures: + plt.show() From 5987c0a8e04d1e397200b8476639cf0fdc236266 Mon Sep 17 00:00:00 2001 From: thewati Date: Tue, 9 Jun 2026 16:36:20 +0200 Subject: [PATCH 24/52] dalys and deaths by causes top 10 --- .../analysis_nurses_scenario_dalys.py | 2002 ++++++----------- 1 file changed, 640 insertions(+), 1362 deletions(-) diff --git a/src/scripts/nurses_analyses/analysis_nurses_scenario_dalys.py b/src/scripts/nurses_analyses/analysis_nurses_scenario_dalys.py index ba22c44da6..b1e7b1154e 100644 --- a/src/scripts/nurses_analyses/analysis_nurses_scenario_dalys.py +++ b/src/scripts/nurses_analyses/analysis_nurses_scenario_dalys.py @@ -1,16 +1,7 @@ -"""Plot DALYs across nurse staffing scenarios. +"""Plot DALYs and Deaths across nurse staffing scenarios. -This script produces two figures for the Default Healthsystem Function scenarios only: +This script produces two figures for the Nurse Shortages analysis: -1. Annual DALYs by year (three lines): - - Baseline Nurses / Default Healthsystem Function - - Fewer Nurses / Default Healthsystem Function - - More Nurses / Default Healthsystem Function - -2. Percent of DALYs averted compared to Baseline - (total between 2027 and 2034): - - More Nurses - - Fewer Nurses """ import argparse @@ -21,16 +12,48 @@ import pandas as pd from scripts.nurses_analyses.nurses_scenario_analyses import StaffingScenario -from tlo.analysis.utils import ( - extract_results, - load_pickled_dataframes, - summarize, -) +from tlo.analysis.utils import extract_results, load_pickled_dataframes, summarize + + +def find_difference_relative_to_comparison_series( + _ser: pd.Series, + comparison: str, + scaled: bool = False, + drop_comparison: bool = True, +): + return ( + _ser + .unstack(level=0) + .apply( + lambda x: ( + (x - x[comparison]) / + (x[comparison] if scaled else 1.0) + ), + axis=1, + ) + .drop( + columns=([comparison] if drop_comparison else []) + ) + .stack() + ) + + +def find_difference_relative_to_comparison_series_dataframe( + _df: pd.DataFrame, + **kwargs, +): + return pd.concat( + { + idx: find_difference_relative_to_comparison_series( + row, + **kwargs, + ) + for idx, row in _df.iterrows() + }, + axis=1, + ).T -# ----------------------------------------------------------------------------- -# Helper function: rename draw numbers to scenario names -# ----------------------------------------------------------------------------- def set_param_names_as_column_index_level_0(_df, param_names): """Set column index level 0 (draw numbers) to scenario names.""" ordered_param_names = {i: x for i, x in enumerate(param_names)} @@ -42,9 +65,6 @@ def set_param_names_as_column_index_level_0(_df, param_names): return _df -# ----------------------------------------------------------------------------- -# Extract annual DALYs -# ----------------------------------------------------------------------------- def extract_annual_dalys(results_folder): def get_num_dalys_yearly(df: pd.DataFrame) -> pd.Series: """Return total DALYs for each year.""" @@ -70,13 +90,11 @@ def get_num_dalys_yearly(df: pd.DataFrame) -> pd.Series: def extract_annual_deaths(results_folder): def get_num_deaths_yearly(df: pd.DataFrame) -> pd.Series: """Return total deaths for each year.""" - yearly = ( df.assign(year=df["date"].dt.year) .groupby("year")["person_id"] .count() ) - return yearly return extract_results( @@ -88,9 +106,7 @@ def get_num_deaths_yearly(df: pd.DataFrame) -> pd.Series: ) -# ----------------------------------------------------------------------------- -# Plot 1: Annual DALYs over time -# ----------------------------------------------------------------------------- +# Plot: Annual DALYs over time def plot_annual_dalys(summarized_annual_dalys): fig, ax = plt.subplots(figsize=(10, 6)) @@ -163,18 +179,10 @@ def plot_annual_deaths(summarized_annual_deaths): for scenario in scenario_names: years = summarized_annual_deaths.index.astype(int) + means = summarized_annual_deaths[(scenario, "mean")].values + lowers = summarized_annual_deaths[(scenario, "lower")].values - means = summarized_annual_deaths[ - (scenario, "mean") - ].values - - lowers = summarized_annual_deaths[ - (scenario, "lower") - ].values - - uppers = summarized_annual_deaths[ - (scenario, "upper") - ].values + uppers = summarized_annual_deaths[(scenario, "upper")].values ax.plot( years, @@ -192,15 +200,10 @@ def plot_annual_deaths(summarized_annual_deaths): ax.set_xlabel("Year") ax.set_ylabel("Annual deaths") - ax.legend() - ax.grid(alpha=0.3) - ax.set_xlim(2025, 2034) - fig.tight_layout() - return fig, ax @@ -210,21 +213,14 @@ def get_deaths_by_cause(df: pd.DataFrame) -> pd.Series: """ Return deaths by cause aggregated across 2027–2034. """ - # Add year df = df.assign(year=df["date"].dt.year) - # Restrict years df = df[df["year"].between(2027, 2034)] - - # CHANGE THIS if your column name differs - cause_col = "cause" - - deaths_by_cause = ( - df.groupby(cause_col)["person_id"] - .count() - ) - + # Changed to "label" in order to capture group causes + # cause_col = "cause" + cause_col = "label" + deaths_by_cause = (df.groupby(cause_col)["person_id"].count()) return deaths_by_cause return extract_results( @@ -237,28 +233,16 @@ def get_deaths_by_cause(df: pd.DataFrame) -> pd.Series: # Extract deaths by age group -# ----------------------------------------------------------------------------- -# ----------------------------------------------------------------------------- -# Extract deaths by age group -# ----------------------------------------------------------------------------- def extract_deaths_by_age_group(results_folder): def get_deaths_by_age_group(df: pd.DataFrame) -> pd.Series: """ Return deaths by age group aggregated across 2027–2034. """ - - # --------------------------------------------------------- - # Add year - # --------------------------------------------------------- df = df.assign(year=df["date"].dt.year) - - # Restrict years df = df[df["year"].between(2027, 2034)] - # --------------------------------------------------------- # Create age groups - # --------------------------------------------------------- age_bins = [ 0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, @@ -291,15 +275,8 @@ def get_deaths_by_age_group(df: pd.DataFrame) -> pd.Series: labels=age_labels, right=False, ) - - # --------------------------------------------------------- # Aggregate deaths by age group - # --------------------------------------------------------- - deaths_by_age = ( - df.groupby("age_group")["person_id"] - .count() - ) - + deaths_by_age = (df.groupby("age_group")["person_id"].count()) return deaths_by_age return extract_results( @@ -317,26 +294,11 @@ def get_dalys_by_cause(df: pd.DataFrame) -> pd.Series: """ Return DALYs by cause aggregated across 2027–2034. """ - - # Add year df = df.assign(year=df["date"].dt.year) - - # Restrict years df = df[df["year"].between(2027, 2034)] - - # Remove metadata columns - metadata_cols = [ - "date", - "sex", - "age_range", - "year", - ] - - cause_cols = [ - c for c in df.columns - if c not in metadata_cols - ] - + # Removing metadata columns + metadata_cols = ["date", "sex", "age_range", "year",] + cause_cols = [c for c in df.columns if c not in metadata_cols] # Sum DALYs for each cause return df[cause_cols].sum() @@ -349,45 +311,28 @@ def get_dalys_by_cause(df: pd.DataFrame) -> pd.Series: ) -# ----------------------------------------------------------------------------- # Extract DALYs by age group -# ----------------------------------------------------------------------------- def extract_dalys_by_age_group(results_folder): def get_dalys_by_age_group(df: pd.DataFrame) -> pd.Series: """ Return DALYs by age group aggregated across 2027–2034. """ - - # Add year df = df.assign(year=df["date"].dt.year) - - # Restrict years df = df[df["year"].between(2027, 2034)] # Metadata columns to exclude - metadata_cols = [ - "date", - "sex", - "age_range", - "year", - ] - + metadata_cols = ["date", "sex", "age_range", "year",] # DALY cause columns - cause_cols = [ - c for c in df.columns - if c not in metadata_cols - ] + cause_cols = [c for c in df.columns if c not in metadata_cols] # Sum DALYs across causes first df["total_dalys"] = df[cause_cols].sum(axis=1) - - # Aggregate by age group + # Aggregating by age group dalys_by_age = ( df.groupby("age_range")["total_dalys"] .sum() ) - return dalys_by_age return extract_results( @@ -399,191 +344,93 @@ def get_dalys_by_age_group(df: pd.DataFrame) -> pd.Series: ) -# ----------------------------------------------------------------------------- -# Plot 2: Percent DALYs averted relative to baseline (2027–2034) -# ----------------------------------------------------------------------------- +# Plot: Percent DALYs averted relative to baseline (2027–2034) def calculate_percent_dalys_averted( - summarized_annual_dalys, + annual_dalys, baseline_scenario, comparison_years=range(2027, 2035), ): """ - Calculate % DALYs averted relative to baseline. - - Returns DataFrame with: - mean - lower - upper + Calculate % DALYs averted using run-to-run differences. """ - - years = summarized_annual_dalys.index.astype(int) + years = annual_dalys.index.astype(int) year_mask = np.isin(years, list(comparison_years)) - scenario_names = summarized_annual_dalys.columns.get_level_values(0).unique() + annual_dalys = annual_dalys.loc[year_mask] - results = {} - - # Baseline totals - baseline_mean = ( - summarized_annual_dalys[(baseline_scenario, "mean")] - .values[year_mask] - .sum() + pct_diff = ( + -100.0 + * find_difference_relative_to_comparison_series_dataframe( + annual_dalys, + comparison=baseline_scenario, + scaled=True, + ) ) - baseline_lower = ( - summarized_annual_dalys[(baseline_scenario, "lower")] - .values[year_mask] - .sum() - ) + # sum across years but keep DataFrame structure + pct_diff = pd.DataFrame( + pct_diff.sum(axis=0) + ).T - baseline_upper = ( - summarized_annual_dalys[(baseline_scenario, "upper")] - .values[year_mask] - .sum() + summarized = summarize(pct_diff) + results = {} + + scenario_names = ( + summarized.columns + .get_level_values(0) + .unique() ) for scenario in scenario_names: - - if scenario == baseline_scenario: - continue - - scenario_mean = ( - summarized_annual_dalys[(scenario, "mean")] - .values[year_mask] - .sum() - ) - - scenario_lower = ( - summarized_annual_dalys[(scenario, "lower")] - .values[year_mask] - .sum() - ) - - scenario_upper = ( - summarized_annual_dalys[(scenario, "upper")] - .values[year_mask] - .sum() - ) - - mean_averted = ( - (baseline_mean - scenario_mean) - / baseline_mean - * 100.0 - ) - - lower_averted = ( - (baseline_lower - scenario_upper) - / baseline_lower - * 100.0 - ) - - upper_averted = ( - (baseline_upper - scenario_lower) - / baseline_upper - * 100.0 - ) - results[scenario] = { - "mean": mean_averted, - "lower": lower_averted, - "upper": upper_averted, + "mean": summarized[(scenario, "mean")].iloc[0], + "lower": summarized[(scenario, "lower")].iloc[0], + "upper": summarized[(scenario, "upper")].iloc[0], } return pd.DataFrame(results).T def calculate_percent_deaths_averted( - summarized_annual_deaths, + annual_deaths, baseline_scenario, comparison_years=range(2027, 2035), ): - years = summarized_annual_deaths.index.astype(int) - + """ + Calculate % deaths averted using run-to-run differences. + """ + years = annual_deaths.index.astype(int) year_mask = np.isin(years, list(comparison_years)) - - scenario_names = ( - summarized_annual_deaths.columns - .get_level_values(0) - .unique() + annual_deaths = annual_deaths.loc[year_mask] + + pct_diff = ( + -100.0 + * find_difference_relative_to_comparison_series_dataframe( + annual_deaths, + comparison=baseline_scenario, + scaled=True, + ) ) - results = {} - - baseline_mean = ( - summarized_annual_deaths[ - (baseline_scenario, "mean") - ] - .values[year_mask] - .sum() - ) + # sum across years but keep DataFrame structure + pct_diff = pd.DataFrame( + pct_diff.sum(axis=0) + ).T - baseline_lower = ( - summarized_annual_deaths[ - (baseline_scenario, "lower") - ] - .values[year_mask] - .sum() - ) + summarized = summarize(pct_diff) + results = {} - baseline_upper = ( - summarized_annual_deaths[ - (baseline_scenario, "upper") - ] - .values[year_mask] - .sum() + scenario_names = ( + summarized.columns + .get_level_values(0) + .unique() ) for scenario in scenario_names: - - if scenario == baseline_scenario: - continue - - scenario_mean = ( - summarized_annual_deaths[ - (scenario, "mean") - ] - .values[year_mask] - .sum() - ) - - scenario_lower = ( - summarized_annual_deaths[ - (scenario, "lower") - ] - .values[year_mask] - .sum() - ) - - scenario_upper = ( - summarized_annual_deaths[ - (scenario, "upper") - ] - .values[year_mask] - .sum() - ) - - mean_averted = ( - (baseline_mean - scenario_mean) - / baseline_mean - * 100.0 - ) - - lower_averted = ( - (baseline_lower - scenario_upper) - / baseline_lower - * 100.0 - ) - - upper_averted = ( - (baseline_upper - scenario_lower) - / baseline_upper - * 100.0 - ) - results[scenario] = { - "mean": mean_averted, - "lower": lower_averted, - "upper": upper_averted, + "mean": summarized[(scenario, "mean")].iloc[0], + "lower": summarized[(scenario, "lower")].iloc[0], + "upper": summarized[(scenario, "upper")].iloc[0], } return pd.DataFrame(results).T @@ -600,32 +447,16 @@ def calculate_percent_dalys_averted_by_cause( fewer_nurses """ - scenario_names = ( - summarized_dalys_by_cause.columns - .get_level_values(0) - .unique() - ) - - baseline = summarized_dalys_by_cause[ - (baseline_scenario, "mean") - ] - + scenario_names = (summarized_dalys_by_cause.columns.get_level_values(0).unique()) + baseline = summarized_dalys_by_cause[(baseline_scenario, "mean")] results = pd.DataFrame(index=baseline.index) for scenario in scenario_names: - if scenario == baseline_scenario: continue - scenario_values = summarized_dalys_by_cause[ - (scenario, "mean") - ] - - percent_averted = ( - (baseline - scenario_values) - / baseline - * 100.0 - ) + scenario_values = summarized_dalys_by_cause[(scenario, "mean")] + percent_averted = ((baseline - scenario_values) / baseline * 100.0) if "More Nurses" in scenario: results["More nurses"] = percent_averted @@ -637,135 +468,91 @@ def calculate_percent_dalys_averted_by_cause( return results -# ----------------------------------------------------------------------------- # Calculate % deaths averted by cause -# ----------------------------------------------------------------------------- def calculate_percent_deaths_averted_by_cause( - summarized_deaths_by_cause, + deaths_by_cause, baseline_scenario, ): - """ - Returns DataFrame indexed by cause with columns: - More nurses - Fewer nurses - """ - scenario_names = ( - summarized_deaths_by_cause.columns - .get_level_values(0) - .unique() + pct_diff = ( + -100.0 + * find_difference_relative_to_comparison_series_dataframe( + deaths_by_cause, + comparison=baseline_scenario, + scaled=True, + ) ) - baseline = summarized_deaths_by_cause[ - (baseline_scenario, "mean") - ] - - results = pd.DataFrame(index=baseline.index) + summarized = summarize(pct_diff) + results = {} + scenario_names = (summarized.columns.get_level_values(0).unique()) for scenario in scenario_names: - if scenario == baseline_scenario: - continue - - scenario_values = summarized_deaths_by_cause[ - (scenario, "mean") - ] - - percent_averted = ( - (baseline - scenario_values) - / baseline - * 100.0 - ) - if "More Nurses" in scenario: - results["More nurses"] = percent_averted - - elif "Fewer Nurses" in scenario: - results["Fewer nurses"] = -percent_averted + results[scenario] = pd.DataFrame({ + "mean": summarized[(scenario, "mean")], + "lower": summarized[(scenario, "lower")], + "upper": summarized[(scenario, "upper")], + }) return results -# ----------------------------------------------------------------------------- -# Calculate % DALYs averted by age group -# ----------------------------------------------------------------------------- -def calculate_percent_dalys_averted_by_age_group( - summarized_dalys_by_age, +def calculate_percent_dalys_averted_by_cause( + dalys_by_cause, baseline_scenario, ): - scenario_names = ( - summarized_dalys_by_age.columns - .get_level_values(0) - .unique() + pct_diff = ( + -100.0 + * find_difference_relative_to_comparison_series_dataframe( + dalys_by_cause, + comparison=baseline_scenario, + scaled=True, + ) ) + summarized = summarize(pct_diff) results = {} - - baseline_mean = summarized_dalys_by_age[ - (baseline_scenario, "mean") - ] - - baseline_lower = summarized_dalys_by_age[ - (baseline_scenario, "lower") - ] - - baseline_upper = summarized_dalys_by_age[ - (baseline_scenario, "upper") - ] + scenario_names = (summarized.columns.get_level_values(0).unique()) for scenario in scenario_names: + results[scenario] = pd.DataFrame({ + "mean": summarized[(scenario, "mean")], + "lower": summarized[(scenario, "lower")], + "upper": summarized[(scenario, "upper")], + }) - if scenario == baseline_scenario: - continue - - scenario_mean = summarized_dalys_by_age[ - (scenario, "mean") - ] - - scenario_lower = summarized_dalys_by_age[ - (scenario, "lower") - ] - - scenario_upper = summarized_dalys_by_age[ - (scenario, "upper") - ] + return results - mean_averted = ( - (baseline_mean - scenario_mean) - / baseline_mean - * 100.0 - ) - lower_averted = ( - (baseline_lower - scenario_upper) - / baseline_lower - * 100.0 - ) +# Calculate % DALYs averted by age group +def calculate_percent_dalys_averted_by_age_group( + dalys_by_age_group, + baseline_scenario, +): + """ + Run-level comparison first, + then summarize. + """ - upper_averted = ( - (baseline_upper - scenario_lower) - / baseline_upper - * 100.0 + pct_diff = ( + -100.0 + * find_difference_relative_to_comparison_series_dataframe( + dalys_by_age_group, + comparison=baseline_scenario, + scaled=True, ) + ) - print(mean_averted.describe()) - - print("\n", scenario) - print("mean:") - print(mean_averted.head()) - - if "Fewer Nurses" in scenario: - positive_values = mean_averted[mean_averted > 0] - - print("\nPOSITIVE VALUES IN FEWER NURSES:") - print(positive_values) - - print("\nNUMBER OF POSITIVE AGE GROUPS:") - print(len(positive_values)) + summarized = summarize(pct_diff) + results = {} + scenario_names = (summarized.columns.get_level_values(0).unique()) + for scenario in scenario_names: results[scenario] = pd.DataFrame({ - "mean": mean_averted, - "lower": lower_averted, - "upper": upper_averted, + "mean": summarized[(scenario, "mean")], + "lower": summarized[(scenario, "lower")], + "upper": summarized[(scenario, "upper")], }) return results @@ -773,690 +560,271 @@ def calculate_percent_dalys_averted_by_age_group( # Calculate % deaths averted by age group def calculate_percent_deaths_averted_by_age_group( - summarized_deaths_by_age, + deaths_by_age_group, baseline_scenario, ): - """ - Returns DataFrame indexed by age group with columns: - More nurses - Fewer nurses - """ - scenario_names = ( - summarized_deaths_by_age.columns - .get_level_values(0) - .unique() + pct_diff = ( + -100.0 + * find_difference_relative_to_comparison_series_dataframe( + deaths_by_age_group, + comparison=baseline_scenario, + scaled=True, + ) ) - baseline = summarized_deaths_by_age[ - (baseline_scenario, "mean") - ] - - results = pd.DataFrame(index=baseline.index) + summarized = summarize(pct_diff) + results = {} + scenario_names = (summarized.columns.get_level_values(0).unique()) for scenario in scenario_names: - - if scenario == baseline_scenario: - continue - - scenario_values = summarized_deaths_by_age[ - (scenario, "mean") - ] - - percent_averted = np.where( - baseline > 0, - (baseline - scenario_values) - / baseline - * 100.0, - np.nan, - ) - - if "More Nurses" in scenario: - results["More nurses"] = percent_averted - - elif "Fewer Nurses" in scenario: - results["Fewer nurses"] = -percent_averted + results[scenario] = pd.DataFrame({ + "mean": summarized[(scenario, "mean")], + "lower": summarized[(scenario, "lower")], + "upper": summarized[(scenario, "upper")], + }) return results -# ----------------------------------------------------------------------------- -# District-level plot: % DALYs averted compared to baseline (2027–2034) -# ----------------------------------------------------------------------------- - -# def extract_annual_dalys_by_district(results_folder): -# """ -# Extract annual DALYs by district. -# -# This uses the same facility-to-district mapping approach -# that worked for staff counts. -# """ -# -# def get_dalys_by_district(df: pd.DataFrame) -> pd.Series: -# """Return total DALYs for each year and district.""" -# -# # Check if we have the right data structure -# if 'date' not in df.columns: -# return pd.Series(dtype=float) -# -# # Extract year -# years = df['date'].dt.year.rename("year") -# -# # Identify district column - for DALYs, district might not be directly available -# # Instead, we need to aggregate from facility-level data if available -# -# # For now, if no district column, return national-level with "National" as district -# if 'district' not in df.columns and 'District' not in df.columns: -# # Sum all DALY causes -# daly_cols = [c for c in df.columns if c not in ['date', 'year', 'sex', 'age_range', 'li_wealth']] -# yearly_total = df.groupby(years)[daly_cols].sum().sum(axis=1) -# -# # Create Series with (year, "National") index -# result = pd.Series( -# yearly_total.values, -# index=pd.MultiIndex.from_arrays([yearly_total.index, ["National"] * len(yearly_total)], -# names=["year", "District"]) -# ) -# return result -# -# # If district column exists, use it -# district_col = 'district' if 'district' in df.columns else 'District' -# daly_cols = [c for c in df.columns if c not in ['date', 'year', 'sex', 'age_range', 'li_wealth', district_col]] -# -# # Group by year and district -# grouped = df.groupby([years, district_col])[daly_cols].sum().sum(axis=1) -# grouped.index = grouped.index.set_names(["year", "District"]) -# -# return grouped.astype(float) -# -# return extract_results( -# results_folder, -# module="tlo.methods.healthburden", -# key="dalys_stacked", # Try this key instead -# custom_generate_series=get_dalys_by_district, -# do_scaling=True, -# ) - - -def find_facility_level_data(results_folder): - """Inspect HealthBurden outputs properly.""" - - from tlo.analysis.utils import load_pickled_dataframes - - log = load_pickled_dataframes(results_folder) - - print("\n" + "=" * 60) - print("Inspecting HealthBurden outputs...") - print("=" * 60) - - healthburden_data = log.get("tlo.methods.healthburden", {}) - - for key_name, obj in healthburden_data.items(): - print(f"\nKEY: {key_name}") - print("-" * 50) +def plot_percent_dalys_averted_comparison(default_df, improved_df,): + fig, axes = plt.subplots(ncols=2, figsize=(12, 6), sharey=True,) - try: - print(f"TYPE: {type(obj)}") - - # If DataFrame directly - if isinstance(obj, pd.DataFrame): - print("DataFrame detected") - print("Columns:") - print(obj.columns.tolist()) - print("\nHEAD:") - print(obj.head()) - continue - - # If dict-like - if isinstance(obj, dict): - - print(f"DICT KEYS: {list(obj.keys())[:5]}") - - first_key = list(obj.keys())[0] - - first_obj = obj[first_key] - - print(f"FIRST OBJECT TYPE: {type(first_obj)}") - - if isinstance(first_obj, pd.DataFrame): - print("Columns:") - print(first_obj.columns.tolist()) - - print("\nHEAD:") - print(first_obj.head()) - - else: - print(first_obj) - - continue - - print(obj) - - except Exception as e: - print(f"ERROR: {e}") - - -def check_all_dalys_columns(results_folder): - """Check every DALY-related key for any facility/district columns""" - from tlo.analysis.utils import load_pickled_dataframes - - log = load_pickled_dataframes(results_folder) - healthburden = log['tlo.methods.healthburden'] - - daly_keys = ['dalys', 'dalys_stacked', 'dalys_stacked_by_age_and_time', - 'dalys_by_wealth_stacked_by_age_and_time'] - - facility_keywords = ['facility', 'district', 'Facility', 'District', - 'facility_id', 'Facility_ID', 'clinic', 'Clinic'] - - for key in daly_keys: - if key not in healthburden: - continue - - print(f"\n{'=' * 50}") - print(f"Checking: {key}") - print('=' * 50) - - sample = healthburden[key][0] - all_columns = sample.columns.tolist() - - print(f"Total columns: {len(all_columns)}") - print(f"Sample columns: {all_columns[:15]}...") - - # Check for facility/district columns - found = [] - for col in all_columns: - for kw in facility_keywords: - if kw.lower() in col.lower(): - found.append(col) - - if found: - print(f"\n✓ FOUND facility/district columns: {found}") - else: - print("\n❌ No facility or district columns found") - - -def check_death_columns(results_folder): - """Inspect death log columns for district/location information.""" - - from tlo.analysis.utils import load_pickled_dataframes - - log = load_pickled_dataframes(results_folder) - - death_log = log["tlo.methods.demography"]["death"] - - print("\n" + "=" * 60) - print("CHECKING DEATH LOG COLUMNS") - print("=" * 60) - - # Handle dict structure - if isinstance(death_log, dict): - first_key = list(death_log.keys())[0] - sample = death_log[first_key] - else: - sample = death_log - - print(sample.columns.tolist()) - - # Search for district-like columns - keywords = [ - "district", - "District", - "facility", - "Facility", - "region", - "location", + panel_data = [ + (axes[0], default_df, "Default Healthsystem"), + (axes[1], improved_df, "Improved Healthsystem"), ] - found = [] - - for col in sample.columns: - for kw in keywords: - if kw.lower() in col.lower(): - found.append(col) - - print("\nPossible district/location columns:") - print(found) - print("\nPossible death columns:") - print(sample.columns.tolist()) - - -def inspect_population_log(results_folder): - from tlo.analysis.utils import load_pickled_dataframes - - log = load_pickled_dataframes(results_folder) - - demography = log["tlo.methods.demography"] - - print("\n" + "=" * 60) - print("INSPECTING POPULATION LOG") - print("=" * 60) - - population_obj = demography["population"] - - print(f"\nTYPE: {type(population_obj)}") + for ax, df, title in panel_data: + ordered_scenarios = [ + s for s in df.index + if "More Nurses" in s + ] + [ + s for s in df.index + if "Fewer Nurses" in s + ] - print("\nDICT KEYS:") - print(population_obj.keys()) + labels = ["More nurses" if "More Nurses" in s else "Fewer nurses" for s in ordered_scenarios] - # Take first run - first_run_key = list(population_obj.keys())[0] + means = df.loc[ordered_scenarios, "mean"].values + lowers = df.loc[ordered_scenarios, "lower"].values + uppers = df.loc[ordered_scenarios, "upper"].values - print(f"\nFIRST RUN KEY: {first_run_key}") + yerr = np.vstack([ + means - lowers, + uppers - means, + ]) - pop_df = population_obj[first_run_key] + colors = ["steelblue" if "More Nurses" in s else "indianred" for s in ordered_scenarios] - print(f"\nOBJECT TYPE: {type(pop_df)}") + ax.bar(labels, means, yerr=yerr, capsize=6, color=colors, width=0.55,) + ax.axhline(0, color="black", linewidth=1,) + ax.set_title(title) + ax.grid(axis="y",alpha=0.3,) - if isinstance(pop_df, pd.DataFrame): - print("\nCOLUMNS:") - print(pop_df.columns.tolist()) + axes[0].set_ylabel( + "% DALYs averted compared to Baseline\n" + "(total between 2027 and 2034)" + ) - print("\nHEAD:") - print(pop_df.head()) + fig.suptitle( + "% DALYs averted relative to baseline (2027–2034)", + fontsize=14, + ) + fig.tight_layout() + return fig, axes - print("\nPOSSIBLE LOCATION COLUMNS:") - location_cols = [ - c for c in pop_df.columns - if any( - kw in c.lower() - for kw in [ - "district", - "region", - "facility", - "location", - "residence" - ] - ) - ] +def plot_percent_deaths_averted_comparison(default_df,improved_df,): + fig, axes = plt.subplots(ncols=2, figsize=(12, 6), sharey=True,) - print(location_cols) - - -# def calculate_percent_dalys_averted_by_district( -# summarized_annual_dalys_by_district, -# baseline_scenario, -# comparison_years=range(2027, 2035), -# ): -# """ -# Calculate % DALYs averted by district relative to baseline. -# -# Returns a DataFrame: -# index = District -# columns = scenarios (excluding baseline) -# -# Positive values = DALYs averted -# Negative values = additional DALYs. -# -# This function is robust to whether the summarized dataframe index is: -# 1. A MultiIndex: (year, district) -# 2. A single Index of tuples: [(year, district), ...] -# """ -# -# # --------------------------------------------------------------------- -# # Reconstruct a proper MultiIndex if summarize() collapsed it into -# # a single-level Index containing tuples like (year, district) -# # --------------------------------------------------------------------- -# if not isinstance( -# summarized_annual_dalys_by_district.index, -# pd.MultiIndex -# ): -# first_value = summarized_annual_dalys_by_district.index[0] -# -# # If index entries are tuples of length 2, rebuild MultiIndex -# if isinstance(first_value, tuple) and len(first_value) == 2: -# summarized_annual_dalys_by_district = ( -# summarized_annual_dalys_by_district.copy() -# ) -# -# summarized_annual_dalys_by_district.index = pd.MultiIndex.from_tuples( -# summarized_annual_dalys_by_district.index, -# names=["year", "District"], -# ) -# else: -# raise ValueError( -# "District-level DALY data does not have a " -# "(year, district) index structure." -# ) -# -# # --------------------------------------------------------------------- -# # At this point we are guaranteed to have a MultiIndex: -# # level 0 = year -# # level 1 = district -# # --------------------------------------------------------------------- -# districts = ( -# summarized_annual_dalys_by_district.index -# .get_level_values(1) -# .unique() -# ) -# -# scenario_names = ( -# summarized_annual_dalys_by_district.columns -# .get_level_values(0) -# .unique() -# ) -# -# comparison_scenarios = [ -# s for s in scenario_names -# if s != baseline_scenario -# ] -# -# # Results DataFrame -# results = pd.DataFrame( -# index=districts, -# columns=comparison_scenarios, -# dtype=float, -# ) -# -# # --------------------------------------------------------------------- -# # Compute % DALYs averted for each district -# # --------------------------------------------------------------------- -# for district in districts: -# -# # Select all years for this district -# district_df = summarized_annual_dalys_by_district.xs( -# district, -# level=1 -# ) -# -# # Keep only years in comparison period -# district_df = district_df.loc[ -# district_df.index.isin(comparison_years) -# ] -# -# # Skip if no data -# if district_df.empty: -# continue -# -# # Baseline DALYs total -# baseline_total = district_df[ -# (baseline_scenario, "mean") -# ].sum() -# -# # Avoid divide by zero -# if baseline_total == 0: -# continue -# -# # Comparison scenarios -# for scenario in comparison_scenarios: -# scenario_total = district_df[ -# (scenario, "mean") -# ].sum() -# -# percent_averted = ( -# (baseline_total - scenario_total) -# / baseline_total -# * 100.0 -# ) -# -# results.loc[district, scenario] = percent_averted -# -# # Remove districts with all missing values -# results = results.dropna(how="all") -# -# # Sort alphabetically -# results = results.sort_index() -# -# return results - - -# def plot_percent_dalys_averted_by_district(percent_averted_by_district): -# """ -# Create horizontal district-level bar chart. -# Bars to the right = DALYs averted (positive) -# Bars to the left = Additional DALYs (negative) -# """ -# -# # Desired scenario order and labels -# scenario_order = [ -# "Fewer Nurses / Default Healthsystem Function", -# "More Nurses / Default Healthsystem Function", -# ] -# -# scenario_order = [ -# s for s in scenario_order -# if s in percent_averted_by_district.columns -# ] -# -# label_map = { -# "Fewer Nurses / Default Healthsystem Function": "Fewer nurses", -# "More Nurses / Default Healthsystem Function": "More nurses", -# } -# -# districts = percent_averted_by_district.index.tolist() -# y = np.arange(len(districts)) -# -# fig_height = max(6, len(districts) * 0.35) -# fig, ax = plt.subplots(figsize=(8, fig_height)) -# -# bar_height = 0.35 -# offsets = np.linspace( -# -bar_height / 2, -# bar_height / 2, -# len(scenario_order) -# ) -# -# for offset, scenario in zip(offsets, scenario_order): -# values = ( -# percent_averted_by_district[scenario] -# .fillna(0) -# .values -# ) -# -# ax.barh( -# y + offset, -# values, -# height=bar_height, -# label=label_map.get(scenario, scenario), -# alpha=0.8, -# ) -# -# # Zero reference line -# ax.axvline(0, color="black", linewidth=1) -# -# # Y-axis -# ax.set_yticks(y) -# ax.set_yticklabels(districts) -# -# # Labels -# ax.set_ylabel("District") -# ax.set_xlabel( -# "% DALYs averted\n" -# "(total 2027–2034)\n" -# "compared to Baseline" -# ) -# -# # Match sketch style: first district at top -# ax.invert_yaxis() -# # Legend -# ax.legend() -# # Light grid -# ax.grid(axis="x", alpha=0.3) -# fig.tight_layout() -# -# return fig, ax - - -def plot_percent_dalys_averted(percent_averted): - fig, ax = plt.subplots(figsize=(7, 6)) - - # Keep desired ordering dynamically - ordered_scenarios = [ - s for s in percent_averted.index - if "More Nurses" in s - ] + [ - s for s in percent_averted.index - if "Fewer Nurses" in s - ] - - labels = [ - "More nurses" if "More Nurses" in s else "Fewer nurses" - for s in ordered_scenarios + panel_data = [ + (axes[0], default_df, "Default Healthsystem"), + (axes[1], improved_df, "Improved Healthsystem"), ] - means = percent_averted.loc[ - ordered_scenarios, "mean" - ].values + for ax, df, title in panel_data: + ordered_scenarios = [ + s for s in df.index + if "More Nurses" in s + ] + [ + s for s in df.index + if "Fewer Nurses" in s + ] - lowers = percent_averted.loc[ - ordered_scenarios, "lower" - ].values + labels = ["More nurses" if "More Nurses" in s else "Fewer nurses" for s in ordered_scenarios] - uppers = percent_averted.loc[ - ordered_scenarios, "upper" - ].values + means = df.loc[ordered_scenarios, "mean"].values + lowers = df.loc[ordered_scenarios, "lower"].values + uppers = df.loc[ordered_scenarios, "upper"].values - yerr = np.vstack([ - means - lowers, - uppers - means, - ]) + yerr = np.vstack([ + means - lowers, + uppers - means, + ]) - ax.bar( - labels, - means, - width=0.45, - yerr=yerr, - capsize=6, - ) + colors = ["steelblue" if "More Nurses" in s else "indianred" for s in ordered_scenarios] - ax.axhline(0, color="black", linewidth=1) + ax.bar(labels, means, yerr=yerr, capsize=6, color=colors, width=0.55,) + ax.axhline(0, color="black", linewidth=1,) + ax.set_title(title) + ax.grid(axis="y", alpha=0.3,) - ax.set_ylabel( - "% DALYs averted compared to Baseline\n" + axes[0].set_ylabel( + "% deaths averted compared to Baseline\n" "(total between 2027 and 2034)" ) - ax.grid(axis="y", alpha=0.3) - + fig.suptitle( + "% deaths averted relative to baseline (2027–2034)", + fontsize=14, + ) fig.tight_layout() + return fig, axes - return fig, ax - - -def plot_percent_deaths_averted(percent_averted): - fig, ax = plt.subplots(figsize=(7, 6)) - ordered_scenarios = [ - s for s in percent_averted.index - if "More Nurses" in s - ] + [ - s for s in percent_averted.index - if "Fewer Nurses" in s - ] +# Plot % DALYs averted by cause +def plot_percent_dalys_averted_by_cause(default_df, improved_df, top_n=10,): - labels = [ - "More nurses" if "More Nurses" in s else "Fewer nurses" - for s in ordered_scenarios + # Extracting scenario dataframes + default_more = default_df[ + "More Nurses / Default Healthsystem Function" ] - means = percent_averted.loc[ - ordered_scenarios, - "mean" - ].values + default_fewer = default_df[ + "Fewer Nurses / Default Healthsystem Function" + ] - lowers = percent_averted.loc[ - ordered_scenarios, - "lower" - ].values + improved_more = improved_df[ + "More Nurses / Improved Healthsystem Function" + ] - uppers = percent_averted.loc[ - ordered_scenarios, - "upper" - ].values + improved_fewer = improved_df[ + "Fewer Nurses / Improved Healthsystem Function" + ] - yerr = np.vstack([ - means - lowers, - uppers - means, - ]) + # Using sum + # total_dalys = ( + # dalys_by_cause + # .xs(baseline_scenario, level="draw", axis=1) + # .sum(axis=1) + # .sort_values(ascending=False) + # ) + # + # top_causes = total_dalys.head(10).index.tolist() + # + # default_more = default_more.loc[top_causes] + # default_fewer = default_fewer.loc[top_causes] + # + # improved_more = improved_more.loc[top_causes] + # improved_fewer = improved_fewer.loc[top_causes] + # + # # Reverse so largest appears at top + # default_more = default_more.iloc[::-1] + # default_fewer = default_fewer.iloc[::-1] + # + # improved_more = improved_more.iloc[::-1] + # improved_fewer = improved_fewer.iloc[::-1] - ax.bar( - labels, - means, - width=0.45, - yerr=yerr, - capsize=6, + # Top causes for DEFAULT healthsystem + default_top = ( + default_more["mean"] + .abs() + .sort_values(ascending=False) + .head(top_n) + .index ) - ax.axhline(0, color="black", linewidth=1) - - ax.set_ylabel( - "% deaths averted compared to Baseline\n" - "(total between 2027 and 2034)" + default_more = ( + default_more.loc[default_top] + .sort_values("mean", ascending=True) ) - ax.grid(axis="y", alpha=0.3) - - fig.tight_layout() - - return fig, ax - + default_fewer = ( + default_fewer.loc[default_top] + .reindex(default_more.index) + ) -# Plot % DALYs averted by cause -def plot_percent_dalys_averted_by_cause( - default_df, - improved_df, - top_n=10, -): - # --------------------------------------------------------- - # Select top causes based on absolute impact - # --------------------------------------------------------- - ranking = ( - default_df["More nurses"].abs() + # Top causes for IMPROVED healthsystem + improved_top = ( + improved_more["mean"] + .abs() .sort_values(ascending=False) + .head(top_n) + .index ) - top_causes = ranking.head(top_n).index.tolist() - - default_df = default_df.loc[top_causes] - improved_df = improved_df.loc[top_causes] - - # Reverse so largest appears at top - default_df = default_df.iloc[::-1] - improved_df = improved_df.iloc[::-1] + improved_more = ( + improved_more.loc[improved_top] + .sort_values("mean", ascending=True) + ) - # --------------------------------------------------------- - # Create subplots - # --------------------------------------------------------- - fig, axes = plt.subplots( - ncols=2, - figsize=(14, 8), - sharey=True, + improved_fewer = ( + improved_fewer.loc[improved_top] + .reindex(improved_more.index) ) + # Plot + fig, axes = plt.subplots(ncols=2, figsize=(14, 8), sharey=False,) + panel_data = [ - (axes[0], default_df, "Default Healthsystem"), - (axes[1], improved_df, "Improved Healthsystem"), + ( + axes[0], + default_more, + default_fewer, + "Default Healthsystem", + ), + ( + axes[1], + improved_more, + improved_fewer, + "Improved Healthsystem", + ), ] - for ax, df, title in panel_data: - y = np.arange(len(df)) - - # More nurses (positive) - ax.barh( - y, - df["More nurses"], - color="lightsteelblue", - label="More nurses", + for ax, more, fewer, title in panel_data: + y = np.arange(len(more)) + ax.barh(y - 0.2, more["mean"], height=0.35, color="steelblue", label="More nurses",) + + ax.barh(y + 0.2, fewer["mean"], height=0.35, color="indianred", label="Fewer nurses",) + + # CI bars: More nurses + ax.errorbar( + more["mean"], + y - 0.2, + xerr=[ + more["mean"] - more["lower"], + more["upper"] - more["mean"], + ], + fmt="none", + capsize=4, + color="black", ) - # Fewer nurses (negative) - ax.barh( - y, - df["Fewer nurses"], - color="lightsteelblue", - label="Fewer nurses", + # CI bars: Fewer nurses + ax.errorbar( + fewer["mean"], + y + 0.2, + xerr=[ + fewer["mean"] - fewer["lower"], + fewer["upper"] - fewer["mean"], + ], + fmt="none", + capsize=4, + color="black", ) - # Zero line ax.axvline(0, color="black", linewidth=1) - # Cause labels ax.set_yticks(y) - ax.set_yticklabels(df.index) + ax.set_yticklabels(more.index) ax.set_xlabel("% DALYs averted") ax.set_title(title) ax.grid(axis="x", alpha=0.3) + handles, labels = axes[0].get_legend_handles_labels() + + fig.legend(handles, labels, loc="lower center", ncol=2, bbox_to_anchor=(0.5, -0.02),) + fig.suptitle( "% DALYs averted by causes on national level\n(2027–2034)" ) @@ -1465,85 +833,131 @@ def plot_percent_dalys_averted_by_cause( # Plot % deaths averted by cause -def plot_percent_deaths_averted_by_cause( - default_df, - improved_df, - top_n=10, -): - # --------------------------------------------------------- - # Select top causes - # --------------------------------------------------------- - ranking = ( - default_df["More nurses"].abs() +def plot_percent_deaths_averted_by_cause(default_df, improved_df, top_n=10,): + + # Extracting scenario dataframes + default_more = default_df[ + "More Nurses / Default Healthsystem Function" + ] + + default_fewer = default_df[ + "Fewer Nurses / Default Healthsystem Function" + ] + + improved_more = improved_df[ + "More Nurses / Improved Healthsystem Function" + ] + + improved_fewer = improved_df[ + "Fewer Nurses / Improved Healthsystem Function" + ] + + # Top causes for DEFAULT healthsystem + default_top = ( + default_more["mean"] + .abs() .sort_values(ascending=False) + .head(top_n) + .index + ) + + default_more = ( + default_more.loc[default_top] + .sort_values("mean", ascending=True) ) - top_causes = ranking.head(top_n).index.tolist() + default_fewer = ( + default_fewer.loc[default_top] + .reindex(default_more.index) + ) - default_df = default_df.loc[top_causes] - improved_df = improved_df.loc[top_causes] + # Top causes for IMPROVED healthsystem + improved_top = ( + improved_more["mean"] + .abs() + .sort_values(ascending=False) + .head(top_n) + .index + ) - # Reverse so largest appears at top - default_df = default_df.iloc[::-1] - improved_df = improved_df.iloc[::-1] + improved_more = ( + improved_more.loc[improved_top] + .sort_values("mean", ascending=True) + ) - # --------------------------------------------------------- - # Create subplots - # --------------------------------------------------------- - fig, axes = plt.subplots( - ncols=2, - figsize=(14, 8), - sharey=True, + improved_fewer = ( + improved_fewer.loc[improved_top] + .reindex(improved_more.index) ) + # Plot + fig, axes = plt.subplots(ncols=2, figsize=(14, 8), sharey=False,) + panel_data = [ - (axes[0], default_df, "Default Healthsystem"), - (axes[1], improved_df, "Improved Healthsystem"), + ( + axes[0], + default_more, + default_fewer, + "Default Healthsystem", + ), + ( + axes[1], + improved_more, + improved_fewer, + "Improved Healthsystem", + ), ] - for ax, df, title in panel_data: - y = np.arange(len(df)) + for ax, more, fewer, title in panel_data: + y = np.arange(len(more)) - ax.barh( - y, - df["More nurses"], - color="lightcoral", - label="More nurses", + ax.barh(y - 0.2, more["mean"], height=0.35, color="steelblue", label="More nurses",) + ax.barh(y + 0.2, fewer["mean"], height=0.35, color="indianred", label="Fewer nurses",) + + ax.errorbar( + more["mean"], + y - 0.2, + xerr=[ + more["mean"] - more["lower"], + more["upper"] - more["mean"], + ], + fmt="none", + capsize=4, + color="black", ) - ax.barh( - y, - df["Fewer nurses"], - color="lightcoral", - label="Fewer nurses", + ax.errorbar( + fewer["mean"], + y + 0.2, + xerr=[ + fewer["mean"] - fewer["lower"], + fewer["upper"] - fewer["mean"], + ], + fmt="none", + capsize=4, + color="black", ) ax.axvline(0, color="black", linewidth=1) ax.set_yticks(y) - ax.set_yticklabels(df.index) + ax.set_yticklabels(more.index) ax.set_xlabel("% deaths averted") ax.set_title(title) ax.grid(axis="x", alpha=0.3) + handles, labels = axes[0].get_legend_handles_labels() + + fig.legend(handles, labels, loc="lower center", ncol=2, bbox_to_anchor=(0.5, -0.02),) fig.suptitle( "% deaths averted by causes on national level\n(2027–2034)" ) - fig.tight_layout() - return fig, axes - # Plot % DALYs averted by age group -# ----------------------------------------------------------------------------- -def plot_percent_dalys_averted_by_age_group( - default_df, - improved_df, -): - # --------------------------------------------------------- - # Extract scenario DataFrames from dictionaries - # --------------------------------------------------------- +def plot_percent_dalys_averted_by_age_group(default_df,improved_df,): + default_more = default_df[ "More Nurses / Default Healthsystem Function" ] @@ -1560,9 +974,7 @@ def plot_percent_dalys_averted_by_age_group( "Fewer Nurses / Improved Healthsystem Function" ] - # --------------------------------------------------------- - # Order age groups - # --------------------------------------------------------- + # Ordering age groups age_order = [ "0-4", "5-9", @@ -1583,12 +995,7 @@ def plot_percent_dalys_averted_by_age_group( "80+", ] - for df in [ - default_more, - default_fewer, - improved_more, - improved_fewer, - ]: + for df in [default_more, default_fewer, improved_more, improved_fewer,]: df = df.reindex(age_order) default_more = default_more.reindex(age_order) @@ -1604,14 +1011,8 @@ def plot_percent_dalys_averted_by_age_group( improved_more = improved_more.iloc[::-1] improved_fewer = improved_fewer.iloc[::-1] - # --------------------------------------------------------- - # Create subplots - # --------------------------------------------------------- - fig, axes = plt.subplots( - ncols=2, - figsize=(14, 8), - sharey=True, - ) + # Plot + fig, axes = plt.subplots(ncols=2, figsize=(14, 8), sharey=True,) panel_data = [ ( @@ -1632,22 +1033,10 @@ def plot_percent_dalys_averted_by_age_group( y = np.arange(len(more)) # More nurses - ax.barh( - y - 0.2, - more["mean"], - height=0.35, - color="steelblue", - label="More Nurses", - ) + ax.barh(y - 0.2, more["mean"], height=0.35, color="steelblue", label="More Nurses",) # Fewer nurses - ax.barh( - y + 0.2, - fewer["mean"], - height=0.35, - color="indianred", - label="Fewer Nurses", - ) + ax.barh(y + 0.2, fewer["mean"], height=0.35, color="indianred", label="Fewer Nurses",) # CI for More Nurses ax.errorbar( @@ -1676,13 +1065,10 @@ def plot_percent_dalys_averted_by_age_group( ) ax.axvline(0, color="black") - ax.set_yticks(y) ax.set_yticklabels(more.index) - ax.set_xlabel("% DALYs averted") ax.set_title(title) - ax.grid(axis="x", alpha=0.3) fig.suptitle( @@ -1692,30 +1078,30 @@ def plot_percent_dalys_averted_by_age_group( # Add legend handles, labels = axes[0].get_legend_handles_labels() - fig.legend( - handles, - labels, - loc="lower center", - ncol=2, - frameon=False, - ) - + fig.legend(handles, labels, loc="lower center", ncol=2, frameon=False,) fig.tight_layout() - return fig, axes -# ----------------------------------------------------------------------------- # Plot % deaths averted by age group -# ----------------------------------------------------------------------------- -def plot_percent_deaths_averted_by_age_group( - default_df, - improved_df, -): +def plot_percent_deaths_averted_by_age_group(default_df,improved_df,): + + default_more = default_df[ + "More Nurses / Default Healthsystem Function" + ] + + default_fewer = default_df[ + "Fewer Nurses / Default Healthsystem Function" + ] + + improved_more = improved_df[ + "More Nurses / Improved Healthsystem Function" + ] + + improved_fewer = improved_df[ + "Fewer Nurses / Improved Healthsystem Function" + ] - # --------------------------------------------------------- - # Order age groups properly - # --------------------------------------------------------- age_order = [ "0-4", "5-9", @@ -1736,81 +1122,87 @@ def plot_percent_deaths_averted_by_age_group( "80+", ] - default_df = ( - default_df - .reindex(age_order) - .dropna(how="all") - ) + default_more = default_more.reindex(age_order) + default_fewer = default_fewer.reindex(age_order) - improved_df = ( - improved_df - .reindex(age_order) - .dropna(how="all") - ) + improved_more = improved_more.reindex(age_order) + improved_fewer = improved_fewer.reindex(age_order) - # Reverse for plotting - default_df = default_df.iloc[::-1] - improved_df = improved_df.iloc[::-1] + # Reverse so oldest age groups appear at top + default_more = default_more.iloc[::-1] + default_fewer = default_fewer.iloc[::-1] - # --------------------------------------------------------- - # Create subplots - # --------------------------------------------------------- - fig, axes = plt.subplots( - ncols=2, - figsize=(14, 8), - sharey=False, - ) + improved_more = improved_more.iloc[::-1] + improved_fewer = improved_fewer.iloc[::-1] + + fig, axes = plt.subplots(ncols=2, figsize=(14, 8), sharey=True,) panel_data = [ - (axes[0], default_df, "Default Healthsystem"), - (axes[1], improved_df, "Improved Healthsystem"), + ( + axes[0], + default_more, + default_fewer, + "Default Healthsystem", + ), + ( + axes[1], + improved_more, + improved_fewer, + "Improved Healthsystem", + ), ] - for ax, df, title in panel_data: + for ax, more, fewer, title in panel_data: + y = np.arange(len(more)) - y = np.arange(len(df)) + ax.barh(y - 0.2, more["mean"], height=0.35, color="steelblue", label="More nurses",) + ax.barh(y + 0.2, fewer["mean"], height=0.35, color="indianred", label="Fewer nurses",) - # More nurses - ax.barh( - y, - df["More nurses"], - color="lightcoral", - label="More nurses", + # More nurses CI + ax.errorbar( + more["mean"], + y - 0.2, + xerr=[ + more["mean"] - more["lower"], + more["upper"] - more["mean"], + ], + fmt="none", + capsize=4, + color="black", ) - # Fewer nurses - ax.barh( - y, - df["Fewer nurses"], - color="lightcoral", - label="Fewer nurses", + # Fewer nurses CI + ax.errorbar( + fewer["mean"], + y + 0.2, + xerr=[ + fewer["mean"] - fewer["lower"], + fewer["upper"] - fewer["mean"], + ], + fmt="none", + capsize=4, + color="black", ) - # Zero line ax.axvline(0, color="black", linewidth=1) - # Labels ax.set_yticks(y) - ax.set_yticklabels(df.index) + ax.set_yticklabels(more.index) ax.set_xlabel("% deaths averted") ax.set_title(title) ax.grid(axis="x", alpha=0.3) + handles, labels = axes[0].get_legend_handles_labels() + fig.legend(handles, labels, loc="lower center", ncol=2, bbox_to_anchor=(0.5, -0.02),) fig.suptitle( "% deaths averted by age group on national level\n(2027–2034)" ) - fig.tight_layout() - return fig, axes -# ----------------------------------------------------------------------------- -# Main -# ----------------------------------------------------------------------------- if __name__ == "__main__": - parser = argparse.ArgumentParser( - "Analyse DALYs across nurse staffing scenarios" + "Analyse DALYs/Deaths across nurse staffing scenarios" ) parser.add_argument( "--scenario-outputs-folder", @@ -1836,24 +1228,10 @@ def plot_percent_deaths_averted_by_age_group( # Optional: load logs log = load_pickled_dataframes(results_folder) - # ADD THIS LINE - Debug to find facility data - # facility_key = find_facility_level_data(results_folder) - # print(f"\n✓ Found facility-level data in key: {facility_key}") - # print("=" * 60 + "\n") - - # daly_cols = check_all_dalys_columns(results_folder) - # print(f"\n✓ Found DALY columns: {daly_cols}") - # check_death_columns(results_folder) - - # inspect_population_log(results_folder) - - # Get scenario names from scenario class + # Getting scenario names from scenario class param_names = tuple(StaffingScenario()._scenarios.keys()) - print("\nPARAM NAMES:") - print(param_names) - - # Scenarios to keep (Default Healthsystem Function only) + # Scnarios to keep (Default Healthsystem Function only) default_hs_scenarios = [ "Baseline Nurses / Default Healthsystem Function", "Fewer Nurses / Default Healthsystem Function", @@ -1868,13 +1246,9 @@ def plot_percent_deaths_averted_by_age_group( "More Nurses / Improved Healthsystem Function", ] - baseline_improved_scenario = ( - "Baseline Nurses / Improved Healthsystem Function" - ) + baseline_improved_scenario = ("Baseline Nurses / Improved Healthsystem Function") - # ------------------------------------------------------------------------- # Extract annual DALYs - # ------------------------------------------------------------------------- annual_dalys = extract_annual_dalys(results_folder).pipe( set_param_names_as_column_index_level_0, param_names=param_names, @@ -1900,92 +1274,39 @@ def plot_percent_deaths_averted_by_age_group( ), ] - print("\nALL DALY SCENARIOS:") - print( - summarized_annual_dalys.columns.get_level_values(0).unique().tolist() - ) - - print("\nFILTERED IMPROVED DALY SCENARIOS:") - print( - summarized_annual_dalys_improved.columns.get_level_values(0).unique().tolist() - ) - - # ------------------------------------------------------------------------- # Plot 1: Annual DALYs over time - # ------------------------------------------------------------------------- fig_1, ax_1 = plot_annual_dalys(summarized_annual_dalys_default) - # ------------------------------------------------------------------------- # Plot 2: Percent DALYs averted relative to baseline (2027–2034) - # ------------------------------------------------------------------------- - percent_averted = calculate_percent_dalys_averted( - summarized_annual_dalys_default, + percent_dalys_averted = calculate_percent_dalys_averted( + annual_dalys.loc[ + :, + annual_dalys.columns.get_level_values(0).isin(default_hs_scenarios) + ], baseline_scenario=baseline_scenario, - comparison_years=range(2027, 2035), # 2027 to 2034 inclusive - ) - - fig_2, ax_2 = plot_percent_dalys_averted(percent_averted) - - # Sensitivity analysis: DALYs under Improved Healthsystem Function - fig_5, ax_5 = plot_annual_dalys( - summarized_annual_dalys_improved + comparison_years=range(2027, 2035), ) - percent_averted_improved = calculate_percent_dalys_averted( - summarized_annual_dalys_improved, + percent_dalys_averted_improved = calculate_percent_dalys_averted( + annual_dalys.loc[ + :, + annual_dalys.columns.get_level_values(0).isin(improved_hs_scenarios) + ], baseline_scenario=baseline_improved_scenario, comparison_years=range(2027, 2035), ) - print("\nPERCENT DALYS AVERTED IMPROVED:") - print(percent_averted_improved) - - fig_6, ax_6 = plot_percent_dalys_averted( - percent_averted_improved + fig_2, ax_2 = plot_percent_dalys_averted_comparison( + percent_dalys_averted, + percent_dalys_averted_improved, ) - # ------------------------------------------------------------------------- - # Plot 3: Percent DALYs averted by district (2027–2034) - # ------------------------------------------------------------------------- - # annual_dalys_by_district = extract_annual_dalys_by_district( - # results_folder - # ).pipe( - # set_param_names_as_column_index_level_0, - # param_names=param_names, - # ) - - # Summarize across runs - # summarized_annual_dalys_by_district = summarize( - # annual_dalys_by_district - # ) - # - # # Filter to Default Healthsystem Function scenarios only - # summarized_annual_dalys_by_district = ( - # summarized_annual_dalys_by_district.loc[ - # :, - # summarized_annual_dalys_by_district.columns - # .get_level_values(0) - # .isin(default_hs_scenarios) - # ] - # ) - - # Calculate district-level % DALYs averted - # percent_averted_by_district = ( - # calculate_percent_dalys_averted_by_district( - # summarized_annual_dalys_by_district, - # baseline_scenario=baseline_scenario, - # comparison_years=range(2027, 2035), # 2027 to 2034 inclusive - # ) - # ) - # - # # Create district-level plot - # fig_3, ax_3 = plot_percent_dalys_averted_by_district( - # percent_averted_by_district - # ) + # Sensitivity analysis: DALYs under Improved Healthsystem Function + fig_5, ax_5 = plot_annual_dalys( + summarized_annual_dalys_improved + ) - # ------------------------------------------------------------------------- # Extract annual deaths - # ------------------------------------------------------------------------- annual_deaths = extract_annual_deaths(results_folder).pipe( set_param_names_as_column_index_level_0, param_names=param_names, @@ -2010,79 +1331,73 @@ def plot_percent_deaths_averted_by_age_group( ] # Plot annual deaths - # ------------------------------------------------------------------------- fig_3, ax_3 = plot_annual_deaths( summarized_annual_deaths_default ) - # ------------------------------------------------------------------------- # Plot % deaths averted - # ------------------------------------------------------------------------- percent_deaths_averted = calculate_percent_deaths_averted( - summarized_annual_deaths_default, + annual_deaths.loc[ + :, + annual_deaths.columns.get_level_values(0).isin(default_hs_scenarios) + ], baseline_scenario=baseline_scenario, comparison_years=range(2027, 2035), ) - fig_4, ax_4 = plot_percent_deaths_averted( - percent_deaths_averted - ) - - # Sensitivity analysis: deaths under Improved Healthsystem Function - fig_7, ax_7 = plot_annual_deaths( - summarized_annual_deaths_improved - ) - percent_deaths_averted_improved = calculate_percent_deaths_averted( - summarized_annual_deaths_improved, + annual_deaths.loc[ + :, + annual_deaths.columns.get_level_values(0).isin(improved_hs_scenarios) + ], baseline_scenario=baseline_improved_scenario, comparison_years=range(2027, 2035), ) - print("\nPERCENT DEATHS AVERTED IMPROVED:") - print(percent_deaths_averted_improved) + fig_4, ax_4 = plot_percent_deaths_averted_comparison( + percent_deaths_averted, + percent_deaths_averted_improved, + ) - fig_8, ax_8 = plot_percent_deaths_averted( - percent_deaths_averted_improved + # Sensitivity analysis: deaths under Improved Healthsystem Function + fig_7, ax_7 = plot_annual_deaths( + summarized_annual_deaths_improved ) # Extract deaths by cause - # ------------------------------------------------------------------------- deaths_by_cause = extract_deaths_by_cause(results_folder).pipe( set_param_names_as_column_index_level_0, param_names=param_names, ) - summarized_deaths_by_cause = summarize(deaths_by_cause) - - summarized_deaths_by_cause_default = ( - summarized_deaths_by_cause.loc[ - :, - summarized_deaths_by_cause.columns - .get_level_values(0) - .isin(default_hs_scenarios) + deaths_by_cause_default = ( + deaths_by_cause.loc[ + :, + deaths_by_cause.columns + .get_level_values(0) + .isin(default_hs_scenarios) ] ) percent_deaths_by_cause_default = ( calculate_percent_deaths_averted_by_cause( - summarized_deaths_by_cause_default, + deaths_by_cause_default, baseline_scenario=baseline_scenario, ) ) - summarized_deaths_by_cause_improved = ( - summarized_deaths_by_cause.loc[ - :, - summarized_deaths_by_cause.columns - .get_level_values(0) - .isin(improved_hs_scenarios) + deaths_by_cause_improved = ( + deaths_by_cause.loc[ + :, + deaths_by_cause.columns + .get_level_values(0) + .isin(improved_hs_scenarios) ] ) percent_deaths_by_cause_improved = ( calculate_percent_deaths_averted_by_cause( - summarized_deaths_by_cause_improved, + deaths_by_cause_improved, baseline_scenario=baseline_improved_scenario, ) ) @@ -2093,9 +1408,7 @@ def plot_percent_deaths_averted_by_age_group( top_n=10, ) - # ------------------------------------------------------------------------- # Extract deaths by age group - # ------------------------------------------------------------------------- deaths_by_age_group = extract_deaths_by_age_group( results_folder ).pipe( @@ -2103,15 +1416,10 @@ def plot_percent_deaths_averted_by_age_group( param_names=param_names, ) - summarized_deaths_by_age_group = summarize( - deaths_by_age_group - ) - - # Deaths by cause Default - summarized_deaths_by_age_group_default = ( - summarized_deaths_by_age_group.loc[ + deaths_by_age_group_default = ( + deaths_by_age_group.loc[ :, - summarized_deaths_by_age_group.columns + deaths_by_age_group.columns .get_level_values(0) .isin(default_hs_scenarios) ] @@ -2119,16 +1427,15 @@ def plot_percent_deaths_averted_by_age_group( percent_deaths_by_age_default = ( calculate_percent_deaths_averted_by_age_group( - summarized_deaths_by_age_group_default, + deaths_by_age_group_default, baseline_scenario=baseline_scenario, ) ) - # Deaths by cause Improved - summarized_deaths_by_age_group_improved = ( - summarized_deaths_by_age_group.loc[ + deaths_by_age_group_improved = ( + deaths_by_age_group.loc[ :, - summarized_deaths_by_age_group.columns + deaths_by_age_group.columns .get_level_values(0) .isin(improved_hs_scenarios) ] @@ -2136,7 +1443,7 @@ def plot_percent_deaths_averted_by_age_group( percent_deaths_by_age_improved = ( calculate_percent_deaths_averted_by_age_group( - summarized_deaths_by_age_group_improved, + deaths_by_age_group_improved, baseline_scenario=baseline_improved_scenario, ) ) @@ -2147,44 +1454,41 @@ def plot_percent_deaths_averted_by_age_group( ) # Extract DALYs by cause - # ------------------------------------------------------------------------- dalys_by_cause = extract_dalys_by_cause(results_folder).pipe( set_param_names_as_column_index_level_0, param_names=param_names, ) - summarized_dalys_by_cause = summarize(dalys_by_cause) - - # DALYs by cause Default - summarized_dalys_by_cause_default = ( - summarized_dalys_by_cause.loc[ - :, - summarized_dalys_by_cause.columns - .get_level_values(0) - .isin(default_hs_scenarios) + # Default Healthsystem + dalys_by_cause_default = ( + dalys_by_cause.loc[ + :, + dalys_by_cause.columns + .get_level_values(0) + .isin(default_hs_scenarios) ] ) percent_by_cause_default = ( calculate_percent_dalys_averted_by_cause( - summarized_dalys_by_cause_default, + dalys_by_cause_default, baseline_scenario=baseline_scenario, ) ) - # DALYs by cause Improved - summarized_dalys_by_cause_improved = ( - summarized_dalys_by_cause.loc[ - :, - summarized_dalys_by_cause.columns - .get_level_values(0) - .isin(improved_hs_scenarios) + # Improved Healthsystem + dalys_by_cause_improved = ( + dalys_by_cause.loc[ + :, + dalys_by_cause.columns + .get_level_values(0) + .isin(improved_hs_scenarios) ] ) percent_by_cause_improved = ( calculate_percent_dalys_averted_by_cause( - summarized_dalys_by_cause_improved, + dalys_by_cause_improved, baseline_scenario=baseline_improved_scenario, ) ) @@ -2196,7 +1500,6 @@ def plot_percent_deaths_averted_by_age_group( ) # Extract DALYs by age group - # ------------------------------------------------------------------------- dalys_by_age_group = extract_dalys_by_age_group( results_folder ).pipe( @@ -2204,68 +1507,48 @@ def plot_percent_deaths_averted_by_age_group( param_names=param_names, ) - summarized_dalys_by_age_group = summarize( - dalys_by_age_group - ) - - # DALYs by age group Default - summarized_dalys_by_age_group_default = ( - summarized_dalys_by_age_group.loc[ - :, - summarized_dalys_by_age_group.columns - .get_level_values(0) - .isin(default_hs_scenarios) + dalys_by_age_group_default = ( + dalys_by_age_group.loc[ + :, + dalys_by_age_group.columns + .get_level_values(0) + .isin(default_hs_scenarios) ] ) percent_dalys_by_age_default = ( calculate_percent_dalys_averted_by_age_group( - summarized_dalys_by_age_group_default, + dalys_by_age_group_default, baseline_scenario=baseline_scenario, ) ) - # DALYs by age group Improved - summarized_dalys_by_age_group_improved = ( - summarized_dalys_by_age_group.loc[ - :, - summarized_dalys_by_age_group.columns - .get_level_values(0) - .isin(improved_hs_scenarios) + dalys_by_age_group_improved = ( + dalys_by_age_group.loc[ + :, + dalys_by_age_group.columns + .get_level_values(0) + .isin(improved_hs_scenarios) ] ) percent_dalys_by_age_improved = ( calculate_percent_dalys_averted_by_age_group( - summarized_dalys_by_age_group_improved, + dalys_by_age_group_improved, baseline_scenario=baseline_improved_scenario, ) ) - print("\nDEFAULT AGE GROUP OBJECT:") - print(type(percent_dalys_by_age_default)) - print(percent_dalys_by_age_default.keys()) - - print("\nIMPROVED AGE GROUP OBJECT:") - print(type(percent_dalys_by_age_improved)) - print(percent_dalys_by_age_improved.keys()) - fig_11, ax_11 = plot_percent_dalys_averted_by_age_group( percent_dalys_by_age_default, percent_dalys_by_age_improved, ) - - - # ------------------------------------------------------------------------- - # Show figures - # ------------------------------------------------------------------------- + # Showing figures if args.show_figures: plt.show() - # ------------------------------------------------------------------------- - # Save figures - # ------------------------------------------------------------------------- + # Saving figures if args.save_figures: fig_1.savefig( results_folder / "annual_dalys_across_scenarios.pdf", @@ -2273,22 +1556,17 @@ def plot_percent_deaths_averted_by_age_group( ) fig_2.savefig( - results_folder / "percent_dalys_averted_vs_baseline_2027_2034.pdf", + results_folder / "percent_dalys_averted_vs_baseline_2027_2034_comparison.pdf", bbox_inches="tight", ) - # fig_3.savefig( - # results_folder / "percent_dalys_averted_by_district_2027_2034.pdf", - # bbox_inches="tight", - # ) - fig_3.savefig( results_folder / "annual_deaths_across_scenarios.pdf", bbox_inches="tight", ) fig_4.savefig( - results_folder / "percent_deaths_averted_vs_baseline_2027_2034.pdf", + results_folder / "percent_deaths_averted_vs_baseline_2027_2034_comparison.pdf", bbox_inches="tight", ) @@ -2299,11 +1577,11 @@ def plot_percent_deaths_averted_by_age_group( bbox_inches="tight", ) - fig_6.savefig( - results_folder / - "percent_dalys_averted_vs_baseline_2027_2034_improved_healthsystem.pdf", - bbox_inches="tight", - ) + # fig_6.savefig( + # results_folder / + # "percent_dalys_averted_vs_baseline_2027_2034_improved_healthsystem.pdf", + # bbox_inches="tight", + # ) # Sensitivity-analysis death figures fig_7.savefig( @@ -2312,11 +1590,11 @@ def plot_percent_deaths_averted_by_age_group( bbox_inches="tight", ) - fig_8.savefig( - results_folder / - "percent_deaths_averted_vs_baseline_2027_2034_improved_healthsystem.pdf", - bbox_inches="tight", - ) + # fig_8.savefig( + # results_folder / + # "percent_deaths_averted_vs_baseline_2027_2034_improved_healthsystem.pdf", + # bbox_inches="tight", + # ) fig_9.savefig( results_folder / From 44b2d9635330947dda76ff4444d2c8ef5ed26f72 Mon Sep 17 00:00:00 2001 From: Bingling Date: Thu, 11 Jun 2026 17:10:26 +0100 Subject: [PATCH 25/52] fix the % dalys/deaths averted calculation --- .../analysis_nurses_scenario_dalys.py | 25 +++++++------------ 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/src/scripts/nurses_analyses/analysis_nurses_scenario_dalys.py b/src/scripts/nurses_analyses/analysis_nurses_scenario_dalys.py index b1e7b1154e..8aa4572e71 100644 --- a/src/scripts/nurses_analyses/analysis_nurses_scenario_dalys.py +++ b/src/scripts/nurses_analyses/analysis_nurses_scenario_dalys.py @@ -357,19 +357,15 @@ def calculate_percent_dalys_averted( year_mask = np.isin(years, list(comparison_years)) annual_dalys = annual_dalys.loc[year_mask] + annual_dalys_agg = annual_dalys.sum(axis=0) - pct_diff = ( + pct_diff = pd.DataFrame( -100.0 - * find_difference_relative_to_comparison_series_dataframe( - annual_dalys, + * find_difference_relative_to_comparison_series( + annual_dalys_agg, comparison=baseline_scenario, scaled=True, ) - ) - - # sum across years but keep DataFrame structure - pct_diff = pd.DataFrame( - pct_diff.sum(axis=0) ).T summarized = summarize(pct_diff) @@ -401,20 +397,17 @@ def calculate_percent_deaths_averted( """ years = annual_deaths.index.astype(int) year_mask = np.isin(years, list(comparison_years)) + annual_deaths = annual_deaths.loc[year_mask] + annual_deaths_agg = annual_deaths.sum(axis=0) - pct_diff = ( + pct_diff = pd.DataFrame( -100.0 - * find_difference_relative_to_comparison_series_dataframe( - annual_deaths, + * find_difference_relative_to_comparison_series( + annual_deaths_agg, comparison=baseline_scenario, scaled=True, ) - ) - - # sum across years but keep DataFrame structure - pct_diff = pd.DataFrame( - pct_diff.sum(axis=0) ).T summarized = summarize(pct_diff) From 40a00c0e2c9b8521a0eb90a0483e959d760ba6ae Mon Sep 17 00:00:00 2001 From: Bingling Date: Thu, 11 Jun 2026 20:46:21 +0100 Subject: [PATCH 26/52] check that total dalys/deaths are equal to sum of dalys/deaths by subgroup; plot all causes --- .../analysis_nurses_scenario_dalys.py | 82 +++++++++---------- 1 file changed, 40 insertions(+), 42 deletions(-) diff --git a/src/scripts/nurses_analyses/analysis_nurses_scenario_dalys.py b/src/scripts/nurses_analyses/analysis_nurses_scenario_dalys.py index 8aa4572e71..db1650e8fb 100644 --- a/src/scripts/nurses_analyses/analysis_nurses_scenario_dalys.py +++ b/src/scripts/nurses_analyses/analysis_nurses_scenario_dalys.py @@ -429,38 +429,6 @@ def calculate_percent_deaths_averted( return pd.DataFrame(results).T -# Calculate % DALYs averted by cause -def calculate_percent_dalys_averted_by_cause( - summarized_dalys_by_cause, - baseline_scenario, -): - """ - Returns DataFrame indexed by cause with columns: - more_nurses - fewer_nurses - """ - - scenario_names = (summarized_dalys_by_cause.columns.get_level_values(0).unique()) - baseline = summarized_dalys_by_cause[(baseline_scenario, "mean")] - results = pd.DataFrame(index=baseline.index) - - for scenario in scenario_names: - if scenario == baseline_scenario: - continue - - scenario_values = summarized_dalys_by_cause[(scenario, "mean")] - percent_averted = ((baseline - scenario_values) / baseline * 100.0) - - if "More Nurses" in scenario: - results["More nurses"] = percent_averted - - elif "Fewer Nurses" in scenario: - # Make negative for mirrored plotting - results["Fewer nurses"] = -percent_averted - - return results - - # Calculate % deaths averted by cause def calculate_percent_deaths_averted_by_cause( deaths_by_cause, @@ -677,7 +645,7 @@ def plot_percent_deaths_averted_comparison(default_df,improved_df,): # Plot % DALYs averted by cause -def plot_percent_dalys_averted_by_cause(default_df, improved_df, top_n=10,): +def plot_percent_dalys_averted_by_cause(default_df, improved_df, top_n=30): # Extracting scenario dataframes default_more = default_df[ @@ -758,7 +726,7 @@ def plot_percent_dalys_averted_by_cause(default_df, improved_df, top_n=10,): ) # Plot - fig, axes = plt.subplots(ncols=2, figsize=(14, 8), sharey=False,) + fig, axes = plt.subplots(ncols=2, figsize=(14, 10), sharey=False,) panel_data = [ ( @@ -790,8 +758,9 @@ def plot_percent_dalys_averted_by_cause(default_df, improved_df, top_n=10,): more["upper"] - more["mean"], ], fmt="none", - capsize=4, + capsize=2, color="black", + alpha=0.5, ) # CI bars: Fewer nurses @@ -803,8 +772,9 @@ def plot_percent_dalys_averted_by_cause(default_df, improved_df, top_n=10,): fewer["upper"] - fewer["mean"], ], fmt="none", - capsize=4, + capsize=2, color="black", + alpha=0.5, ) ax.axvline(0, color="black", linewidth=1) @@ -826,7 +796,7 @@ def plot_percent_dalys_averted_by_cause(default_df, improved_df, top_n=10,): # Plot % deaths averted by cause -def plot_percent_deaths_averted_by_cause(default_df, improved_df, top_n=10,): +def plot_percent_deaths_averted_by_cause(default_df, improved_df, top_n=30): # Extracting scenario dataframes default_more = default_df[ @@ -884,7 +854,7 @@ def plot_percent_deaths_averted_by_cause(default_df, improved_df, top_n=10,): ) # Plot - fig, axes = plt.subplots(ncols=2, figsize=(14, 8), sharey=False,) + fig, axes = plt.subplots(ncols=2, figsize=(14, 10), sharey=False,) panel_data = [ ( @@ -915,8 +885,9 @@ def plot_percent_deaths_averted_by_cause(default_df, improved_df, top_n=10,): more["upper"] - more["mean"], ], fmt="none", - capsize=4, + capsize=2, color="black", + alpha=0.5, ) ax.errorbar( @@ -927,8 +898,9 @@ def plot_percent_deaths_averted_by_cause(default_df, improved_df, top_n=10,): fewer["upper"] - fewer["mean"], ], fmt="none", - capsize=4, + capsize=2, color="black", + alpha=0.5 ) ax.axvline(0, color="black", linewidth=1) @@ -1363,6 +1335,14 @@ def plot_percent_deaths_averted_by_age_group(default_df,improved_df,): param_names=param_names, ) + # check that total deaths equal to sum of deaths by cause + total_deaths = annual_deaths.loc[ + (annual_deaths.index >= 2027) & (annual_deaths.index <= 2034) + ].sum(axis=0) + total_deaths_cause = deaths_by_cause.sum(axis=0) + assert (total_deaths.index == total_deaths_cause.index).all() + assert (abs(total_deaths.values - total_deaths_cause.values) < 1e-7).all() + deaths_by_cause_default = ( deaths_by_cause.loc[ :, @@ -1398,7 +1378,7 @@ def plot_percent_deaths_averted_by_age_group(default_df,improved_df,): fig_10, ax_10 = plot_percent_deaths_averted_by_cause( percent_deaths_by_cause_default, percent_deaths_by_cause_improved, - top_n=10, + top_n=30, ) # Extract deaths by age group @@ -1409,6 +1389,11 @@ def plot_percent_deaths_averted_by_age_group(default_df,improved_df,): param_names=param_names, ) + # check that total deaths equal to sum of deaths by age group + total_deaths_age = deaths_by_age_group.sum(axis=0) + assert (total_deaths.index == total_deaths_age.index).all() + assert (abs(total_deaths.values - total_deaths_age.values) < 1e-7).all() + deaths_by_age_group_default = ( deaths_by_age_group.loc[ :, @@ -1452,6 +1437,14 @@ def plot_percent_deaths_averted_by_age_group(default_df,improved_df,): param_names=param_names, ) + # check that total dalys equal to sum of dalys by cause + total_dalys = annual_dalys.loc[ + (annual_dalys.index >= 2027) & (annual_dalys.index <= 2034) + ].sum(axis=0) + total_dalys_cause = dalys_by_cause.sum(axis=0) + assert (total_dalys.index == total_dalys_cause.index).all() + assert (abs(total_dalys.values - total_dalys_cause.values) < 1e-7).all() + # Default Healthsystem dalys_by_cause_default = ( dalys_by_cause.loc[ @@ -1489,7 +1482,7 @@ def plot_percent_deaths_averted_by_age_group(default_df,improved_df,): fig_9, ax_9 = plot_percent_dalys_averted_by_cause( percent_by_cause_default, percent_by_cause_improved, - top_n=10, + top_n=30, ) # Extract DALYs by age group @@ -1500,6 +1493,11 @@ def plot_percent_deaths_averted_by_age_group(default_df,improved_df,): param_names=param_names, ) + # check that total dalys equal to sum of dalys by age groups + total_dalys_age = dalys_by_age_group.sum(axis=0) + assert (total_dalys.index == total_dalys_age.index).all() + assert (abs(total_dalys.values - total_dalys_age.values) < 1e-7).all() + dalys_by_age_group_default = ( dalys_by_age_group.loc[ :, From 25416cf1d7ac6957d66d54f04c5b0f8b48a183be Mon Sep 17 00:00:00 2001 From: Bingling Date: Thu, 11 Jun 2026 21:50:17 +0100 Subject: [PATCH 27/52] reorder the cause in dalys/deaths plots --- .../analysis_nurses_scenario_dalys.py | 90 ++++++++++++------- 1 file changed, 56 insertions(+), 34 deletions(-) diff --git a/src/scripts/nurses_analyses/analysis_nurses_scenario_dalys.py b/src/scripts/nurses_analyses/analysis_nurses_scenario_dalys.py index db1650e8fb..48ca6c3b3b 100644 --- a/src/scripts/nurses_analyses/analysis_nurses_scenario_dalys.py +++ b/src/scripts/nurses_analyses/analysis_nurses_scenario_dalys.py @@ -696,15 +696,17 @@ def plot_percent_dalys_averted_by_cause(default_df, improved_df, top_n=30): .index ) - default_more = ( - default_more.loc[default_top] - .sort_values("mean", ascending=True) - ) + # default_more = ( + # default_more.loc[default_top] + # .sort_values("mean", ascending=True) + # ) + default_more = default_more.reindex(cause_order) - default_fewer = ( - default_fewer.loc[default_top] - .reindex(default_more.index) - ) + # default_fewer = ( + # default_fewer.loc[default_top] + # .reindex(default_more.index) + # ) + default_fewer = default_fewer.reindex(cause_order) # Top causes for IMPROVED healthsystem improved_top = ( @@ -715,18 +717,20 @@ def plot_percent_dalys_averted_by_cause(default_df, improved_df, top_n=30): .index ) - improved_more = ( - improved_more.loc[improved_top] - .sort_values("mean", ascending=True) - ) + # improved_more = ( + # improved_more.loc[improved_top] + # .sort_values("mean", ascending=True) + # ) + improved_more = improved_more.reindex(cause_order) - improved_fewer = ( - improved_fewer.loc[improved_top] - .reindex(improved_more.index) - ) + # improved_fewer = ( + # improved_fewer.loc[improved_top] + # .reindex(improved_more.index) + # ) + improved_fewer = improved_fewer.reindex(cause_order) # Plot - fig, axes = plt.subplots(ncols=2, figsize=(14, 10), sharey=False,) + fig, axes = plt.subplots(ncols=2, figsize=(14, 10), sharey=True) panel_data = [ ( @@ -824,15 +828,17 @@ def plot_percent_deaths_averted_by_cause(default_df, improved_df, top_n=30): .index ) - default_more = ( - default_more.loc[default_top] - .sort_values("mean", ascending=True) - ) + # default_more = ( + # default_more.loc[default_top] + # .sort_values("mean", ascending=True) + # ) + default_more = default_more.reindex(death_order) - default_fewer = ( - default_fewer.loc[default_top] - .reindex(default_more.index) - ) + # default_fewer = ( + # default_fewer.loc[default_top] + # .reindex(default_more.index) + # ) + default_fewer = default_fewer.reindex(death_order) # Top causes for IMPROVED healthsystem improved_top = ( @@ -843,18 +849,20 @@ def plot_percent_deaths_averted_by_cause(default_df, improved_df, top_n=30): .index ) - improved_more = ( - improved_more.loc[improved_top] - .sort_values("mean", ascending=True) - ) + # improved_more = ( + # improved_more.loc[improved_top] + # .sort_values("mean", ascending=True) + # ) + improved_more = improved_more.reindex(death_order) - improved_fewer = ( - improved_fewer.loc[improved_top] - .reindex(improved_more.index) - ) + # improved_fewer = ( + # improved_fewer.loc[improved_top] + # .reindex(improved_more.index) + # ) + improved_fewer = improved_fewer.reindex(death_order) # Plot - fig, axes = plt.subplots(ncols=2, figsize=(14, 10), sharey=False,) + fig, axes = plt.subplots(ncols=2, figsize=(14, 10), sharey=True) panel_data = [ ( @@ -1343,6 +1351,13 @@ def plot_percent_deaths_averted_by_age_group(default_df,improved_df,): assert (total_deaths.index == total_deaths_cause.index).all() assert (abs(total_deaths.values - total_deaths_cause.values) < 1e-7).all() + # find the descending order of causes in terms of total deaths in baseline scenario + mean_deaths_by_cause = deaths_by_cause.groupby(axis=1, level="draw").mean().sort_values( + by="Baseline Nurses / Default Healthsystem Function", + ascending=True, + ) + death_order = mean_deaths_by_cause.index.tolist() + deaths_by_cause_default = ( deaths_by_cause.loc[ :, @@ -1445,6 +1460,13 @@ def plot_percent_deaths_averted_by_age_group(default_df,improved_df,): assert (total_dalys.index == total_dalys_cause.index).all() assert (abs(total_dalys.values - total_dalys_cause.values) < 1e-7).all() + # find the descending order of causes in terms of total dalys in baseline scenario + mean_dalys_by_cause = dalys_by_cause.groupby(axis=1, level="draw").mean().sort_values( + by="Baseline Nurses / Default Healthsystem Function", + ascending=True, + ) + cause_order = mean_dalys_by_cause.index.tolist() + # Default Healthsystem dalys_by_cause_default = ( dalys_by_cause.loc[ From d5a65ec58f02bd16df7783288d72153b3314e3ec Mon Sep 17 00:00:00 2001 From: Bingling Date: Thu, 18 Jun 2026 10:49:29 +0100 Subject: [PATCH 28/52] update existing resource file names for clarity, as more scenarios to be added --- .../custom_worse.csv | 10 ---------- .../improved_staffing.csv | 10 ---------- .../nurses_analyses/nurses_scenario_analyses.py | 8 ++++---- 3 files changed, 4 insertions(+), 24 deletions(-) delete mode 100644 resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_worse.csv delete mode 100644 resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/improved_staffing.csv diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_worse.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_worse.csv deleted file mode 100644 index be1dc97f7f..0000000000 --- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/custom_worse.csv +++ /dev/null @@ -1,10 +0,0 @@ -Officer_Category,L0_factor,L1a_factor,L1b_factor,L2_factor,L3_factor,L4_factor,L5_factor -Clinical,1,1,1,1,1,1,1 -DCSA,1,1,1,1,1,1,1 -Dental,1,1,1,1,1,1,1 -Laboratory,1,1,1,1,1,1,1 -Mental,1,1,1,1,1,1,1 -Nursing_and_Midwifery,0.85,0.85,0.85,0.85,0.85,0.85,0.85 -Nutrition,1,1,1,1,1,1,1 -Pharmacy,1,1,1,1,1,1,1 -Radiography,1,1,1,1,1,1,1 diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/improved_staffing.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/improved_staffing.csv deleted file mode 100644 index 953c74107f..0000000000 --- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/improved_staffing.csv +++ /dev/null @@ -1,10 +0,0 @@ -Officer_Category,L0_factor,L1a_factor,L1b_factor,L2_factor,L3_factor,L4_factor,L5_factor -Clinical,1,1,1,1,1,1,1 -DCSA,1,1,1,1,1,1,1 -Dental,1,1,1,1,1,1,1 -Laboratory,1,1,1,1,1,1,1 -Mental,1,1,1,1,1,1,1 -Nursing_and_Midwifery,1.455,1.455,1.455,1.455,1.455,1.455,1.455 -Nutrition,1,1,1,1,1,1,1 -Pharmacy,1,1,1,1,1,1,1 -Radiography,1,1,1,1,1,1,1 diff --git a/src/scripts/nurses_analyses/nurses_scenario_analyses.py b/src/scripts/nurses_analyses/nurses_scenario_analyses.py index 5202c29c8f..4f0343514b 100644 --- a/src/scripts/nurses_analyses/nurses_scenario_analyses.py +++ b/src/scripts/nurses_analyses/nurses_scenario_analyses.py @@ -109,7 +109,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: self._default_of_all_scenarios, { "HealthSystem": { - 'HR_scaling_by_level_and_officer_type_mode': "custom_worse", + 'HR_scaling_by_level_and_officer_type_mode': "worse_staffing_N", "year_HR_scaling_by_level_and_officer_type": 2027, }, }, @@ -120,7 +120,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: self._default_of_all_scenarios, { "HealthSystem": { - 'HR_scaling_by_level_and_officer_type_mode': "improved_staffing", + 'HR_scaling_by_level_and_officer_type_mode': "establishment_staffing_N", "year_HR_scaling_by_level_and_officer_type": 2027, }, }, @@ -142,7 +142,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: self._default_of_all_max_healthsystem_scenarios, { "HealthSystem": { - 'HR_scaling_by_level_and_officer_type_mode': "custom_worse", + 'HR_scaling_by_level_and_officer_type_mode': "worse_staffing_N", "year_HR_scaling_by_level_and_officer_type": 2027, }, }, @@ -153,7 +153,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: self._default_of_all_max_healthsystem_scenarios, { "HealthSystem": { - 'HR_scaling_by_level_and_officer_type_mode': "improved_staffing", + 'HR_scaling_by_level_and_officer_type_mode': "establishment_staffing_N", "year_HR_scaling_by_level_and_officer_type": 2027, }, }, From c9cad9bec3860211bc5da3fe80eccfa42d172e71 Mon Sep 17 00:00:00 2001 From: Bingling Date: Thu, 18 Jun 2026 10:53:34 +0100 Subject: [PATCH 29/52] re-add the existing resource files after renaming --- .../establishment_staffing_N.csv | 10 ++++++++++ .../worse_staffing_N.csv | 10 ++++++++++ 2 files changed, 20 insertions(+) create mode 100644 resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/establishment_staffing_N.csv create mode 100644 resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/worse_staffing_N.csv diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/establishment_staffing_N.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/establishment_staffing_N.csv new file mode 100644 index 0000000000..64ca01af65 --- /dev/null +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/establishment_staffing_N.csv @@ -0,0 +1,10 @@ +Officer_Category,L0_factor,L1a_factor,L1b_factor,L2_factor,L3_factor,L4_factor,L5_factor +Clinical,1,1,1,1,1,1,1 +DCSA,1,1,1,1,1,1,1 +Dental,1,1,1,1,1,1,1 +Laboratory,1,1,1,1,1,1,1 +Mental,1,1,1,1,1,1,1 +Nursing_and_Midwifery,1.455369535,1.455369535,1.455369535,1.455369535,1.455369535,1.455369535,1.455369535 +Nutrition,1,1,1,1,1,1,1 +Pharmacy,1,1,1,1,1,1,1 +Radiography,1,1,1,1,1,1,1 diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/worse_staffing_N.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/worse_staffing_N.csv new file mode 100644 index 0000000000..be1dc97f7f --- /dev/null +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/worse_staffing_N.csv @@ -0,0 +1,10 @@ +Officer_Category,L0_factor,L1a_factor,L1b_factor,L2_factor,L3_factor,L4_factor,L5_factor +Clinical,1,1,1,1,1,1,1 +DCSA,1,1,1,1,1,1,1 +Dental,1,1,1,1,1,1,1 +Laboratory,1,1,1,1,1,1,1 +Mental,1,1,1,1,1,1,1 +Nursing_and_Midwifery,0.85,0.85,0.85,0.85,0.85,0.85,0.85 +Nutrition,1,1,1,1,1,1,1 +Pharmacy,1,1,1,1,1,1,1 +Radiography,1,1,1,1,1,1,1 From ebb3cf0e7e910866317a7e2b79bd49592ad18ae7 Mon Sep 17 00:00:00 2001 From: Bingling Date: Thu, 18 Jun 2026 10:54:25 +0100 Subject: [PATCH 30/52] add in resource files for additional scenarios --- .../default.csv | 38 +++++++++++++++++++ .../establishment_by_district_and_CNP.csv | 38 +++++++++++++++++++ .../establishment_by_district_and_N.csv | 38 +++++++++++++++++++ .../establishment_staffing_CNP.csv | 10 +++++ 4 files changed, 124 insertions(+) create mode 100644 resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/default.csv create mode 100644 resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_CNP.csv create mode 100644 resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_N.csv create mode 100644 resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/establishment_staffing_CNP.csv diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/default.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/default.csv new file mode 100644 index 0000000000..9cdbd1be66 --- /dev/null +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/default.csv @@ -0,0 +1,38 @@ +District,Clinical ,Nursing_and_Midwifery,Pharmacy,DCSA,Dental,Laboratory,Mental,Nutrition,Radiography +Balaka,1,1,1,1,1,1,1,1,1 +Blantyre,1,1,1,1,1,1,1,1,1 +Blantyre City,1,1,1,1,1,1,1,1,1 +Central Hospitals (Central),1,1,1,1,1,1,1,1,1 +Central Hospitals (Northern),1,1,1,1,1,1,1,1,1 +Central Hospitals (Southern),1,1,1,1,1,1,1,1,1 +Chikwawa,1,1,1,1,1,1,1,1,1 +Chiradzulu,1,1,1,1,1,1,1,1,1 +Chitipa,1,1,1,1,1,1,1,1,1 +Dedza,1,1,1,1,1,1,1,1,1 +Dowa,1,1,1,1,1,1,1,1,1 +Headquarter,1,1,1,1,1,1,1,1,1 +Karonga,1,1,1,1,1,1,1,1,1 +Kasungu,1,1,1,1,1,1,1,1,1 +Likoma,1,1,1,1,1,1,1,1,1 +Lilongwe,1,1,1,1,1,1,1,1,1 +Lilongwe City,1,1,1,1,1,1,1,1,1 +Machinga,1,1,1,1,1,1,1,1,1 +Mangochi,1,1,1,1,1,1,1,1,1 +Mchinji,1,1,1,1,1,1,1,1,1 +Mulanje,1,1,1,1,1,1,1,1,1 +Mwanza,1,1,1,1,1,1,1,1,1 +Mzimba,1,1,1,1,1,1,1,1,1 +Mzuzu City,1,1,1,1,1,1,1,1,1 +Neno,1,1,1,1,1,1,1,1,1 +Nkhata Bay,1,1,1,1,1,1,1,1,1 +Nkhotakota,1,1,1,1,1,1,1,1,1 +Nsanje,1,1,1,1,1,1,1,1,1 +Ntcheu,1,1,1,1,1,1,1,1,1 +Ntchisi,1,1,1,1,1,1,1,1,1 +Phalombe,1,1,1,1,1,1,1,1,1 +Rumphi,1,1,1,1,1,1,1,1,1 +Salima,1,1,1,1,1,1,1,1,1 +Thyolo,1,1,1,1,1,1,1,1,1 +Zomba,1,1,1,1,1,1,1,1,1 +Zomba City,1,1,1,1,1,1,1,1,1 +Zomba Mental Hospital,1,1,1,1,1,1,1,1,1 diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_CNP.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_CNP.csv new file mode 100644 index 0000000000..733889f0b5 --- /dev/null +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_CNP.csv @@ -0,0 +1,38 @@ +District,Clinical ,Nursing_and_Midwifery,Pharmacy,DCSA,Dental,Laboratory,Mental,Nutrition,Radiography +Balaka,1.174526999,1.169316612,2.052879982,1,1,1,1,1,1 +Blantyre,0.759811383,0.89730864,1.578474944,1,1,1,1,1,1 +Blantyre City,0.759811383,0.89730864,1.578474944,1,1,1,1,1,1 +Central Hospitals (Central),2.486954763,1.355424224,1.412864458,1,1,1,1,1,1 +Central Hospitals (Northern),2.197010075,1.070124208,0.897056799,1,1,1,1,1,1 +Central Hospitals (Southern),2.177651732,1.402012229,0.710875199,1,1,1,1,1,1 +Chikwawa,1.571206225,1.98442501,1.947217042,1,1,1,1,1,1 +Chiradzulu,1.508760236,1.900692263,2.173637628,1,1,1,1,1,1 +Chitipa,1.145249933,1.936513522,1.690607044,1,1,1,1,1,1 +Dedza,1.23965271,1.543188619,2.264205862,1,1,1,1,1,1 +Dowa,1.435421075,1.737431437,1.792297693,1,1,1,1,1,1 +Headquarter,1.183424931,3.62272938,1.449091752,1,1,1,1,1,1 +Karonga,1.335881459,1.872527654,3.043092679,1,1,1,1,1,1 +Kasungu,1.482635542,1.937984358,2.557220739,1,1,1,1,1,1 +Likoma,2.233502608,3.155265589,3.042577533,1,1,1,1,1,1 +Lilongwe,1.380087383,1.167518839,1.709475426,1,1,1,1,1,1 +Lilongwe City,1.380087383,1.167518839,1.709475426,1,1,1,1,1,1 +Machinga,1.584340315,1.853985036,1.96662452,1,1,1,1,1,1 +Mangochi,1.212312054,1.260791694,2.940803849,1,1,1,1,1,1 +Mchinji,1.319220321,1.722723439,1.716029706,1,1,1,1,1,1 +Mulanje,1.442124965,1.488502197,3.550274792,1,1,1,1,1,1 +Mwanza,2.032262823,1.646695173,1.368586655,1,1,1,1,1,1 +Mzimba,1.664191309,1.462212301,2.258878319,1,1,1,1,1,1 +Mzuzu City,1.664191309,1.462212302,2.258878319,1,1,1,1,1,1 +Neno,1.90022409,2.133827415,2.264205862,1,1,1,1,1,1 +Nkhata Bay,1.64711602,2.260579269,2.781346584,1,1,1,1,1,1 +Nkhotakota,1.798270487,1.964020582,2.225390905,1,1,1,1,1,1 +Nsanje,1.560560348,2.028728453,2.475531743,1,1,1,1,1,1 +Ntcheu,1.436599582,1.544519745,2.753274328,1,1,1,1,1,1 +Ntchisi,1.511195684,1.885047321,2.041902014,1,1,1,1,1,1 +Phalombe,1.81136469,1.561895301,1.783497541,1,1,1,1,1,1 +Rumphi,1.680946432,1.643251104,1.494375869,1,1,1,1,1,1 +Salima,1.156345943,1.449091752,2.371241048,1,1,1,1,1,1 +Thyolo,1.283305831,1.360371849,2.083069393,1,1,1,1,1,1 +Zomba,1.12943916,1.10442287,1.332229514,1,1,1,1,1,1 +Zomba City,1.12943916,1.10442287,1.332229514,1,1,1,1,1,1 +Zomba Mental Hospital,1.521546339,0.85520169,2.173637628,1,1,1,1,1,1 diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_N.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_N.csv new file mode 100644 index 0000000000..c3ad875668 --- /dev/null +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_N.csv @@ -0,0 +1,38 @@ +District,Clinical ,Nursing_and_Midwifery,Pharmacy,DCSA,Dental,Laboratory,Mental,Nutrition,Radiography +Balaka,1,1.169316612,1,1,1,1,1,1,1 +Blantyre,1,0.89730864,1,1,1,1,1,1,1 +Blantyre City,1,0.89730864,1,1,1,1,1,1,1 +Central Hospitals (Central),1,1.355424224,1,1,1,1,1,1,1 +Central Hospitals (Northern),1,1.070124208,1,1,1,1,1,1,1 +Central Hospitals (Southern),1,1.402012229,1,1,1,1,1,1,1 +Chikwawa,1,1.98442501,1,1,1,1,1,1,1 +Chiradzulu,1,1.900692263,1,1,1,1,1,1,1 +Chitipa,1,1.936513522,1,1,1,1,1,1,1 +Dedza,1,1.543188619,1,1,1,1,1,1,1 +Dowa,1,1.737431437,1,1,1,1,1,1,1 +Headquarter,1,3.62272938,1,1,1,1,1,1,1 +Karonga,1,1.872527654,1,1,1,1,1,1,1 +Kasungu,1,1.937984358,1,1,1,1,1,1,1 +Likoma,1,3.155265589,1,1,1,1,1,1,1 +Lilongwe,1,1.167518839,1,1,1,1,1,1,1 +Lilongwe City,1,1.167518839,1,1,1,1,1,1,1 +Machinga,1,1.853985036,1,1,1,1,1,1,1 +Mangochi,1,1.260791694,1,1,1,1,1,1,1 +Mchinji,1,1.722723439,1,1,1,1,1,1,1 +Mulanje,1,1.488502197,1,1,1,1,1,1,1 +Mwanza,1,1.646695173,1,1,1,1,1,1,1 +Mzimba,1,1.462212301,1,1,1,1,1,1,1 +Mzuzu City,1,1.462212302,1,1,1,1,1,1,1 +Neno,1,2.133827415,1,1,1,1,1,1,1 +Nkhata Bay,1,2.260579269,1,1,1,1,1,1,1 +Nkhotakota,1,1.964020582,1,1,1,1,1,1,1 +Nsanje,1,2.028728453,1,1,1,1,1,1,1 +Ntcheu,1,1.544519745,1,1,1,1,1,1,1 +Ntchisi,1,1.885047321,1,1,1,1,1,1,1 +Phalombe,1,1.561895301,1,1,1,1,1,1,1 +Rumphi,1,1.643251104,1,1,1,1,1,1,1 +Salima,1,1.449091752,1,1,1,1,1,1,1 +Thyolo,1,1.360371849,1,1,1,1,1,1,1 +Zomba,1,1.10442287,1,1,1,1,1,1,1 +Zomba City,1,1.10442287,1,1,1,1,1,1,1 +Zomba Mental Hospital,1,0.85520169,1,1,1,1,1,1,1 diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/establishment_staffing_CNP.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/establishment_staffing_CNP.csv new file mode 100644 index 0000000000..4577899122 --- /dev/null +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type/establishment_staffing_CNP.csv @@ -0,0 +1,10 @@ +Officer_Category,L0_factor,L1a_factor,L1b_factor,L2_factor,L3_factor,L4_factor,L5_factor +Clinical,1.536932742,1.536932742,1.536932742,1.536932742,1.536932742,1.536932742,1.536932742 +DCSA,1,1,1,1,1,1,1 +Dental,1,1,1,1,1,1,1 +Laboratory,1,1,1,1,1,1,1 +Mental,1,1,1,1,1,1,1 +Nursing_and_Midwifery,1.455369535,1.455369535,1.455369535,1.455369535,1.455369535,1.455369535,1.455369535 +Nutrition,1,1,1,1,1,1,1 +Pharmacy,1.855698791,1.855698791,1.855698791,1.855698791,1.855698791,1.855698791,1.855698791 +Radiography,1,1,1,1,1,1,1 From 5787ab5b38c66a59263c68d34b4cea045324d8a4 Mon Sep 17 00:00:00 2001 From: Bingling Date: Thu, 18 Jun 2026 11:29:19 +0100 Subject: [PATCH 31/52] add parameters and the function of HRH scaling by district and officer type --- .../ResourceFile_HealthSystem_parameters.csv | 1 + src/tlo/methods/healthsystem.py | 58 +++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv b/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv index c6bd6414e7..05f0b5df2f 100644 --- a/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv +++ b/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv @@ -26,3 +26,4 @@ year_use_funded_or_actual_staffing_switch,2100 cons_override_treatment_ids,[] cons_override_treatment_ids_prob_avail,1.0 clinic_configuration_name,Default +year_HR_scaling_by_district_and_officer_type,2100 diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index f8b0f55a03..709b67fb79 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -308,6 +308,26 @@ class HealthSystem(Module): "(factors informed by survey data); and, `custom` (user can freely set these factors as " "parameters in the analysis).", ), + "HR_scaling_by_district_and_officer_type_table": Parameter( + Types.DICT, + "Factors by which daily capabilities of difference cadres in different districts will be" + "scaled at the start of the year specified by year_HR_scaling_by_district_officer_type to simulate" + "(e.g., through catastrophic event disrupting delivery of services in particular district(s))." + "This is the import of a folder of csv resource files: keys are the file names and values are in the " + "csv files in the format of pd.DataFrames. Additional scenarios can be added by adding " + "csv files to this folder: the value of `HR_scaling_by_district_officer_type_mode` indicates which" + "csv file is used.", + ), + "year_HR_scaling_by_district_and_officer_type": Parameter( + Types.INT, + "Year in which scaling of daily capabilities by district and cadre will take place. " + "(The change happens on 1st January of that year.)", + ), + "HR_scaling_by_district_and_officer_type_mode": Parameter( + Types.STRING, + "Mode of scaling of daily capabilities by district and cadre. This corresponds to the name of the " + "worksheet in the file `ResourceFile_HR_scaling_by_district.xlsx`.", + ), "HR_scaling_by_district_table": Parameter( Types.DICT, "Factors by which daily capabilities in different districts will be" @@ -904,6 +924,13 @@ def initialise_simulation(self, sim): Date(self.parameters["year_HR_scaling_by_level_and_officer_type"], 1, 1), ) + # Schedule a one-off rescaling of _daily_capabilities broken down by district and officer type. + # This occurs on 1st January of the year specified in the parameters. + sim.schedule_event( + ConstantRescalingHRCapabilities(self), + Date(self.parameters["year_HR_scaling_by_district_and_officer_type"], 1, 1), + ) + # Schedule a one-off rescaling of _daily_capabilities broken down by district # This occurs on 1st January of the year specified in the parameters. sim.schedule_event( @@ -2983,6 +3010,37 @@ def apply(self, population): officer_type, f"L{level}_factor" ] +class RescaleHRCapabilities_ByDistrictAndOfficerType(Event, PopulationScopeEventMixin): + """This event exists to scale the daily capabilities, with a factor for each pair district and cadre.""" + + def __init__(self, module): + super().__init__(module) + + def apply(self, population): + # Get the set of scaling_factors that are specified by 'HR_scaling_by_district_and_officer_type_mode' + HR_scaling_factor_by_district_and_officer_type = ( + self.module.parameters["HR_scaling_by_district_and_officer_type_table"][ + self.module.parameters["HR_scaling_by_district_and_officer_type_mode"] + ] + .set_index("District") + .to_dict() + ) + + pattern = r"FacilityID_(\w+)_Officer_(\w+)" + for clinic, clinic_cl in self.module._daily_capabilities.items(): + for officer in clinic_cl.keys(): + matches = re.match(pattern, officer) + # Extract ID and officer type from + facility_id = int(matches.group(1)) + district = self.module._facility_by_facility_id[facility_id].name.split('_')[-1] + if ( + (district in HR_scaling_factor_by_district_and_officer_type) and + (officer in HR_scaling_factor_by_district_and_officer_type.columns) + ): + self.module._daily_capabilities[clinic][officer] *= ( + HR_scaling_factor_by_district_and_officer_type.loc[district, officer] + ) + class RescaleHRCapabilities_ByDistrict(Event, PopulationScopeEventMixin): """This event exists to scale the daily capabilities, with a factor for each district.""" From b8eee836f020848c216353e456f221e766a17ba9 Mon Sep 17 00:00:00 2001 From: Bingling Date: Thu, 18 Jun 2026 11:41:51 +0100 Subject: [PATCH 32/52] add mode parameter in the hs parameter list --- resources/healthsystem/ResourceFile_HealthSystem_parameters.csv | 1 + 1 file changed, 1 insertion(+) diff --git a/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv b/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv index 05f0b5df2f..35ca5e32d1 100644 --- a/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv +++ b/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv @@ -27,3 +27,4 @@ cons_override_treatment_ids,[] cons_override_treatment_ids_prob_avail,1.0 clinic_configuration_name,Default year_HR_scaling_by_district_and_officer_type,2100 +HR_scaling_by_district_and_officer_type_mode,default From 86faac6aa8935cb7d1d9873c0914d49606f080e1 Mon Sep 17 00:00:00 2001 From: Bingling Date: Thu, 18 Jun 2026 11:50:53 +0100 Subject: [PATCH 33/52] add more scenarios as designed --- .../nurses_scenario_analyses.py | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/src/scripts/nurses_analyses/nurses_scenario_analyses.py b/src/scripts/nurses_analyses/nurses_scenario_analyses.py index 4f0343514b..d48593c7ad 100644 --- a/src/scripts/nurses_analyses/nurses_scenario_analyses.py +++ b/src/scripts/nurses_analyses/nurses_scenario_analyses.py @@ -158,6 +158,72 @@ def _get_scenarios(self) -> Dict[str, Dict]: }, }, ), + + "More CNP staff / Default Healthsystem Function": + mix_scenarios( + self._default_of_all_scenarios, + { + "HealthSystem": { + 'HR_scaling_by_level_and_officer_type_mode': "establishment_staffing_CNP", + "year_HR_scaling_by_level_and_officer_type": 2027, + }, + }, + ), + + "More CNP staff / Improved Healthsystem Function": + mix_scenarios( + self._default_of_all_max_healthsystem_scenarios, + { + "HealthSystem": { + 'HR_scaling_by_level_and_officer_type_mode': "establishment_staffing_CNP", + "year_HR_scaling_by_level_and_officer_type": 2027, + }, + }, + ), + + "More Nurses by District / Default Healthsystem Function": + mix_scenarios( + self._default_of_all_scenarios, + { + "HealthSystem": { + 'HR_scaling_by_district_and_officer_type_mode': "establishment_by_district_and_N", + "year_HR_scaling_by_district_and_officer_type": 2027, + }, + }, + ), + + "More Nurses by District / Improved Healthsystem Function": + mix_scenarios( + self._default_of_all_max_healthsystem_scenarios, + { + "HealthSystem": { + 'HR_scaling_by_district_and_officer_type_mode': "establishment_by_district_and_N", + "year_HR_scaling_by_district_and_officer_type": 2027, + }, + }, + ), + + "More CNP staff by District / Default Healthsystem Function": + mix_scenarios( + self._default_of_all_scenarios, + { + "HealthSystem": { + 'HR_scaling_by_district_and_officer_type_mode': "establishment_by_district_and_CNP", + "year_HR_scaling_by_district_and_officer_type": 2027, + }, + }, + ), + + "More CNP staff by District / Improved Healthsystem Function": + mix_scenarios( + self._default_of_all_max_healthsystem_scenarios, + { + "HealthSystem": { + 'HR_scaling_by_district_and_officer_type_mode': "establishment_by_district_and_CNP", + "year_HR_scaling_by_district_and_officer_type": 2027, + }, + }, + ), } From 1726853bd61f8ad8b7bfcfd435ea2bd6eaa8f3ae Mon Sep 17 00:00:00 2001 From: Bingling Date: Thu, 18 Jun 2026 12:00:35 +0100 Subject: [PATCH 34/52] correct typo --- src/tlo/methods/healthsystem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 709b67fb79..4c6be9bf0c 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -927,7 +927,7 @@ def initialise_simulation(self, sim): # Schedule a one-off rescaling of _daily_capabilities broken down by district and officer type. # This occurs on 1st January of the year specified in the parameters. sim.schedule_event( - ConstantRescalingHRCapabilities(self), + RescaleHRCapabilities_ByDistrictAndOfficerType(self), Date(self.parameters["year_HR_scaling_by_district_and_officer_type"], 1, 1), ) From e8a8cab034be37706a961892835f006af47052fe Mon Sep 17 00:00:00 2001 From: Bingling Date: Thu, 18 Jun 2026 12:23:37 +0100 Subject: [PATCH 35/52] read in missing parameter --- src/tlo/methods/healthsystem.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 4c6be9bf0c..bd3a2bd48e 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -714,6 +714,22 @@ def read_consumables(filename): f"{self.parameters['HR_scaling_by_level_and_officer_type_mode']}" ) + self.parameters["HR_scaling_by_district_and_officer_type_table"]: Dict = read_csv_files( + path_to_resourcefiles_for_healthsystem + / "human_resources" + / "scaling_capabilities" + / "ResourceFile_HR_scaling_by_district_and_officer_type", + files=None, # all sheets read in + ) + # Ensure the mode of HR scaling to be considered in included in the tables loaded + assert ( + self.parameters["HR_scaling_by_district_and_officer_type_mode"] + in self.parameters["HR_scaling_by_district_and_officer_type_table"] + ), ( + f"Value of `HR_scaling_by_district_and_officer_type_mode` not recognised: " + f"{self.parameters['HR_scaling_by_district_and_officer_type_mode']}" + ) + self.parameters["HR_scaling_by_district_table"]: Dict = read_csv_files( path_to_resourcefiles_for_healthsystem / "human_resources" From dc87136246913a6e100071c235fb97b935e6057b Mon Sep 17 00:00:00 2001 From: Bingling Date: Thu, 18 Jun 2026 16:53:15 +0100 Subject: [PATCH 36/52] update district names to be consistent with master facility list --- .../default.csv | 6 +++--- .../establishment_by_district_and_CNP.csv | 6 +++--- .../establishment_by_district_and_N.csv | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/default.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/default.csv index 9cdbd1be66..69d96f3109 100644 --- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/default.csv +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/default.csv @@ -2,9 +2,9 @@ Balaka,1,1,1,1,1,1,1,1,1 Blantyre,1,1,1,1,1,1,1,1,1 Blantyre City,1,1,1,1,1,1,1,1,1 -Central Hospitals (Central),1,1,1,1,1,1,1,1,1 -Central Hospitals (Northern),1,1,1,1,1,1,1,1,1 -Central Hospitals (Southern),1,1,1,1,1,1,1,1,1 +Referral Hospital_Central,1,1,1,1,1,1,1,1,1 +Referral Hospital_Northern,1,1,1,1,1,1,1,1,1 +Referral Hospital_Southern,1,1,1,1,1,1,1,1,1 Chikwawa,1,1,1,1,1,1,1,1,1 Chiradzulu,1,1,1,1,1,1,1,1,1 Chitipa,1,1,1,1,1,1,1,1,1 diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_CNP.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_CNP.csv index 733889f0b5..0130a7d2c7 100644 --- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_CNP.csv +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_CNP.csv @@ -2,9 +2,9 @@ Balaka,1.174526999,1.169316612,2.052879982,1,1,1,1,1,1 Blantyre,0.759811383,0.89730864,1.578474944,1,1,1,1,1,1 Blantyre City,0.759811383,0.89730864,1.578474944,1,1,1,1,1,1 -Central Hospitals (Central),2.486954763,1.355424224,1.412864458,1,1,1,1,1,1 -Central Hospitals (Northern),2.197010075,1.070124208,0.897056799,1,1,1,1,1,1 -Central Hospitals (Southern),2.177651732,1.402012229,0.710875199,1,1,1,1,1,1 +Referral Hospitals_Central,2.486954763,1.355424224,1.412864458,1,1,1,1,1,1 +Referral Hospitals_Northern,2.197010075,1.070124208,0.897056799,1,1,1,1,1,1 +Referral Hospitals_Southern,2.177651732,1.402012229,0.710875199,1,1,1,1,1,1 Chikwawa,1.571206225,1.98442501,1.947217042,1,1,1,1,1,1 Chiradzulu,1.508760236,1.900692263,2.173637628,1,1,1,1,1,1 Chitipa,1.145249933,1.936513522,1.690607044,1,1,1,1,1,1 diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_N.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_N.csv index c3ad875668..e75fb1ac8a 100644 --- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_N.csv +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_N.csv @@ -2,9 +2,9 @@ Balaka,1,1.169316612,1,1,1,1,1,1,1 Blantyre,1,0.89730864,1,1,1,1,1,1,1 Blantyre City,1,0.89730864,1,1,1,1,1,1,1 -Central Hospitals (Central),1,1.355424224,1,1,1,1,1,1,1 -Central Hospitals (Northern),1,1.070124208,1,1,1,1,1,1,1 -Central Hospitals (Southern),1,1.402012229,1,1,1,1,1,1,1 +Referral Hospital_Central,1,1.355424224,1,1,1,1,1,1,1 +Referral Hospital_Northern,1,1.070124208,1,1,1,1,1,1,1 +Referral Hospitals_Southern,1,1.402012229,1,1,1,1,1,1,1 Chikwawa,1,1.98442501,1,1,1,1,1,1,1 Chiradzulu,1,1.900692263,1,1,1,1,1,1,1 Chitipa,1,1.936513522,1,1,1,1,1,1,1 From 81f1d919d16093bf0eec3f5ab58369445f830767 Mon Sep 17 00:00:00 2001 From: Bingling Date: Thu, 18 Jun 2026 20:48:44 +0100 Subject: [PATCH 37/52] fix the new hrh scaling function --- src/tlo/methods/healthsystem.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index bd3a2bd48e..97ae0a8551 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -3039,7 +3039,6 @@ def apply(self, population): self.module.parameters["HR_scaling_by_district_and_officer_type_mode"] ] .set_index("District") - .to_dict() ) pattern = r"FacilityID_(\w+)_Officer_(\w+)" @@ -3048,13 +3047,21 @@ def apply(self, population): matches = re.match(pattern, officer) # Extract ID and officer type from facility_id = int(matches.group(1)) - district = self.module._facility_by_facility_id[facility_id].name.split('_')[-1] + officer_type = matches.group(2) + # Extract district + if facility_id.isin(range(128)): + district = self.module._facility_by_facility_id[facility_id].name.split('_')[-1] + elif facility_id.isin([128, 129, 130,131, 132]): + district = self.module._facility_by_facility_id[facility_id].name + else: + district = "" + # Scaling if ( - (district in HR_scaling_factor_by_district_and_officer_type) and - (officer in HR_scaling_factor_by_district_and_officer_type.columns) + (district in HR_scaling_factor_by_district_and_officer_type.index) and + (officer_type in HR_scaling_factor_by_district_and_officer_type.columns) ): self.module._daily_capabilities[clinic][officer] *= ( - HR_scaling_factor_by_district_and_officer_type.loc[district, officer] + HR_scaling_factor_by_district_and_officer_type.loc[district, officer_type] ) From 7dc174887ae7df7099d1ba1217e67b590fc11953 Mon Sep 17 00:00:00 2001 From: Bingling Date: Thu, 18 Jun 2026 21:58:48 +0100 Subject: [PATCH 38/52] fix the new hrh scaling function --- src/tlo/methods/healthsystem.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 97ae0a8551..6c14ff7707 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -3049,12 +3049,12 @@ def apply(self, population): facility_id = int(matches.group(1)) officer_type = matches.group(2) # Extract district - if facility_id.isin(range(128)): + if facility_id in range(128): district = self.module._facility_by_facility_id[facility_id].name.split('_')[-1] - elif facility_id.isin([128, 129, 130,131, 132]): + elif facility_id in {128, 129, 130,131, 132}: district = self.module._facility_by_facility_id[facility_id].name else: - district = "" + district = "N/A" # Scaling if ( (district in HR_scaling_factor_by_district_and_officer_type.index) and From 863084b2c786d5060ffb11040f58cd39a89c999b Mon Sep 17 00:00:00 2001 From: Bingling Date: Thu, 18 Jun 2026 23:49:54 +0100 Subject: [PATCH 39/52] fix the referral hospital's district information, to be consistent with master facility list --- .../establishment_by_district_and_CNP.csv | 6 +++--- .../establishment_by_district_and_N.csv | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_CNP.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_CNP.csv index 0130a7d2c7..359980d82c 100644 --- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_CNP.csv +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_CNP.csv @@ -2,9 +2,9 @@ Balaka,1.174526999,1.169316612,2.052879982,1,1,1,1,1,1 Blantyre,0.759811383,0.89730864,1.578474944,1,1,1,1,1,1 Blantyre City,0.759811383,0.89730864,1.578474944,1,1,1,1,1,1 -Referral Hospitals_Central,2.486954763,1.355424224,1.412864458,1,1,1,1,1,1 -Referral Hospitals_Northern,2.197010075,1.070124208,0.897056799,1,1,1,1,1,1 -Referral Hospitals_Southern,2.177651732,1.402012229,0.710875199,1,1,1,1,1,1 +Referral Hospital_Central,2.486954763,1.355424224,1.412864458,1,1,1,1,1,1 +Referral Hospital_Northern,2.197010075,1.070124208,0.897056799,1,1,1,1,1,1 +Referral Hospital_Southern,2.177651732,1.402012229,0.710875199,1,1,1,1,1,1 Chikwawa,1.571206225,1.98442501,1.947217042,1,1,1,1,1,1 Chiradzulu,1.508760236,1.900692263,2.173637628,1,1,1,1,1,1 Chitipa,1.145249933,1.936513522,1.690607044,1,1,1,1,1,1 diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_N.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_N.csv index e75fb1ac8a..68691eafae 100644 --- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_N.csv +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_N.csv @@ -4,7 +4,7 @@ Blantyre,1,0.89730864,1,1,1,1,1,1,1 Blantyre City,1,0.89730864,1,1,1,1,1,1,1 Referral Hospital_Central,1,1.355424224,1,1,1,1,1,1,1 Referral Hospital_Northern,1,1.070124208,1,1,1,1,1,1,1 -Referral Hospitals_Southern,1,1.402012229,1,1,1,1,1,1,1 +Referral Hospital_Southern,1,1.402012229,1,1,1,1,1,1,1 Chikwawa,1,1.98442501,1,1,1,1,1,1,1 Chiradzulu,1,1.900692263,1,1,1,1,1,1,1 Chitipa,1,1.936513522,1,1,1,1,1,1,1 From 11fa3f1e88bf9156a9f06a0786852bb16416427c Mon Sep 17 00:00:00 2001 From: Bingling Date: Thu, 18 Jun 2026 23:53:51 +0100 Subject: [PATCH 40/52] fix typo --- src/tlo/methods/healthsystem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 6c14ff7707..3d48928cac 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -3051,7 +3051,7 @@ def apply(self, population): # Extract district if facility_id in range(128): district = self.module._facility_by_facility_id[facility_id].name.split('_')[-1] - elif facility_id in {128, 129, 130,131, 132}: + elif facility_id in {128, 129, 130, 131, 132}: district = self.module._facility_by_facility_id[facility_id].name else: district = "N/A" From 940101ba921a277828a4fbbc571592a8454433e2 Mon Sep 17 00:00:00 2001 From: Bingling Date: Thu, 18 Jun 2026 23:56:54 +0100 Subject: [PATCH 41/52] local test run all scenarios --- .../nurses_scenario_analyses.py | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/scripts/nurses_analyses/nurses_scenario_analyses.py b/src/scripts/nurses_analyses/nurses_scenario_analyses.py index d48593c7ad..4a3793fd86 100644 --- a/src/scripts/nurses_analyses/nurses_scenario_analyses.py +++ b/src/scripts/nurses_analyses/nurses_scenario_analyses.py @@ -35,11 +35,11 @@ def __init__(self): self.resources = get_root_path() / "resources" self.seed = 0 self.start_date = Date(2010, 1, 1) - self.end_date = Date(2035, 1, 1) - self.pop_size = 100000 + self.end_date = Date(2012, 1, 2) + self.pop_size = 30 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) - self.runs_per_draw = 5 + self.runs_per_draw = 1 def log_configuration(self): return { @@ -92,6 +92,7 @@ def _default_of_all_max_healthsystem_scenarios(self) -> Dict: def _get_scenarios(self) -> Dict[str, Dict]: """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario. """ + year_of_hr_scaling = 2011 return { "Baseline Nurses / Default Healthsystem Function": mix_scenarios( @@ -99,7 +100,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: { "HealthSystem": { 'HR_scaling_by_level_and_officer_type_mode': "default", - "year_HR_scaling_by_level_and_officer_type": 2027, + "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, }, }, ), @@ -110,7 +111,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: { "HealthSystem": { 'HR_scaling_by_level_and_officer_type_mode': "worse_staffing_N", - "year_HR_scaling_by_level_and_officer_type": 2027, + "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, }, }, ), @@ -121,7 +122,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: { "HealthSystem": { 'HR_scaling_by_level_and_officer_type_mode': "establishment_staffing_N", - "year_HR_scaling_by_level_and_officer_type": 2027, + "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, }, }, ), @@ -132,7 +133,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: { "HealthSystem": { 'HR_scaling_by_level_and_officer_type_mode': "default", - "year_HR_scaling_by_level_and_officer_type": 2027, + "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, }, }, ), @@ -143,7 +144,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: { "HealthSystem": { 'HR_scaling_by_level_and_officer_type_mode': "worse_staffing_N", - "year_HR_scaling_by_level_and_officer_type": 2027, + "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, }, }, ), @@ -154,7 +155,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: { "HealthSystem": { 'HR_scaling_by_level_and_officer_type_mode': "establishment_staffing_N", - "year_HR_scaling_by_level_and_officer_type": 2027, + "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, }, }, ), @@ -165,7 +166,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: { "HealthSystem": { 'HR_scaling_by_level_and_officer_type_mode': "establishment_staffing_CNP", - "year_HR_scaling_by_level_and_officer_type": 2027, + "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, }, }, ), @@ -176,7 +177,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: { "HealthSystem": { 'HR_scaling_by_level_and_officer_type_mode': "establishment_staffing_CNP", - "year_HR_scaling_by_level_and_officer_type": 2027, + "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, }, }, ), @@ -187,7 +188,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: { "HealthSystem": { 'HR_scaling_by_district_and_officer_type_mode': "establishment_by_district_and_N", - "year_HR_scaling_by_district_and_officer_type": 2027, + "year_HR_scaling_by_district_and_officer_type": year_of_hr_scaling, }, }, ), @@ -198,7 +199,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: { "HealthSystem": { 'HR_scaling_by_district_and_officer_type_mode': "establishment_by_district_and_N", - "year_HR_scaling_by_district_and_officer_type": 2027, + "year_HR_scaling_by_district_and_officer_type": year_of_hr_scaling, }, }, ), @@ -209,7 +210,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: { "HealthSystem": { 'HR_scaling_by_district_and_officer_type_mode': "establishment_by_district_and_CNP", - "year_HR_scaling_by_district_and_officer_type": 2027, + "year_HR_scaling_by_district_and_officer_type": year_of_hr_scaling, }, }, ), @@ -220,7 +221,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: { "HealthSystem": { 'HR_scaling_by_district_and_officer_type_mode': "establishment_by_district_and_CNP", - "year_HR_scaling_by_district_and_officer_type": 2027, + "year_HR_scaling_by_district_and_officer_type": year_of_hr_scaling, }, }, ), From 4d599530cb69bb7d15c3a1447afa1c6f4cb92f4f Mon Sep 17 00:00:00 2001 From: Bingling Date: Thu, 18 Jun 2026 23:58:36 +0100 Subject: [PATCH 42/52] small edits to add district info for facilities at level 3+ --- .../nurses_analyses/analysis_staff_num_more_districts.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/scripts/nurses_analyses/analysis_staff_num_more_districts.py b/src/scripts/nurses_analyses/analysis_staff_num_more_districts.py index 077b57c5d5..8e1273f59e 100644 --- a/src/scripts/nurses_analyses/analysis_staff_num_more_districts.py +++ b/src/scripts/nurses_analyses/analysis_staff_num_more_districts.py @@ -310,6 +310,10 @@ def get_yearly_hr_count(_df): Path("./resources/healthsystem/organisation/ResourceFile_Master_Facilities_List.csv") ).set_index("Facility_ID") + # Temporarily add District info for facilities at levels 3+ + for fid in {128, 129, 130, 131, 132}: + mfl.loc[fid, "District"] = mfl.loc[fid, "Facility_Name"] + # Map facilities to districts districts = [ mfl.loc[fid, "District"] if fid in mfl.index else "Unknown" @@ -462,6 +466,9 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No param_names=param_names ) + # for local check + # staff_counts = staff_counts.reset_index() + # STEP 3: summarize runs print(type(staff_counts)) print(staff_counts.head()) From 3c4953267004645a085caaa2bc325756918aebab Mon Sep 17 00:00:00 2001 From: thewati Date: Fri, 19 Jun 2026 09:23:09 +0200 Subject: [PATCH 43/52] Add district loggers for deaths and dalys. Next is test file --- src/tlo/methods/healthburden.py | 79 ++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 36 deletions(-) diff --git a/src/tlo/methods/healthburden.py b/src/tlo/methods/healthburden.py index 54db8bf8fb..b19bfcaca5 100644 --- a/src/tlo/methods/healthburden.py +++ b/src/tlo/methods/healthburden.py @@ -89,10 +89,15 @@ def initialise_simulation(self, sim): age_index = self.sim.modules['Demography'].AGE_RANGE_CATEGORIES wealth_index = sim.modules['Lifestyle'].PROPERTIES['li_wealth'].categories year_index = list(range(self.sim.start_date.year, self.sim.end_date.year + 1)) + district_index = sim.modules['Demography'].PROPERTIES['district_of_residence'].categories self.multi_index_for_age_and_wealth_and_time = pd.MultiIndex.from_product( [sex_index, age_index, wealth_index, year_index], names=['sex', 'age_range', 'li_wealth', 'year']) + self.multi_index_for_age_and_wealth_and_time_and_region = pd.MultiIndex.from_product( + [sex_index, age_index, wealth_index, district_index, year_index], + names=['sex', 'age_range', 'li_wealth', 'district_of_residence', 'year']) + # Create the YLL and YLD storage data-frame (using sex/age_range/year multi-index) self.years_life_lost = pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time) self.years_life_lost_stacked_time = pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time) @@ -168,6 +173,7 @@ def process_causes_of_dalys(self): 3) Output to the log mappers for causes of disability to the label """ ... + # 1) Collect causes of death and disability that are reported by each disease module, # merging the gbd_causes declared for deaths or disabilities under the same label, @@ -192,13 +198,13 @@ def merge_dicts_of_causes(d1: Dict, d2: Dict) -> Dict: return merged_causes causes_of_death = collect_causes_from_disease_modules( - all_modules=self.sim.modules.values(), - collect='CAUSES_OF_DEATH', - acceptable_causes=self.sim.modules['Demography'].gbd_causes_of_death) + all_modules=self.sim.modules.values(), + collect='CAUSES_OF_DEATH', + acceptable_causes=self.sim.modules['Demography'].gbd_causes_of_death) causes_of_disability = collect_causes_from_disease_modules( - all_modules=self.sim.modules.values(), - collect='CAUSES_OF_DISABILITY', - acceptable_causes=set(self.parameters['gbd_causes_of_disability'])) + all_modules=self.sim.modules.values(), + collect='CAUSES_OF_DISABILITY', + acceptable_causes=set(self.parameters['gbd_causes_of_disability'])) causes_of_death_and_disability = merge_dicts_of_causes( causes_of_death, @@ -299,7 +305,7 @@ def get_daly_weight(self, sequlae_code): return daly_wt - def report_live_years_lost(self, sex=None, wealth=None, date_of_birth=None, age_range=None, cause_of_death=None): + def report_live_years_lost(self, sex=None, wealth=None, date_of_birth=None, age_range=None, district_of_residence=None, cause_of_death=None): """ Calculate and store the period for which there is 'years of lost life' when someone dies (assuming that the person has died on today's date in the simulation). @@ -313,11 +319,11 @@ def report_live_years_lost(self, sex=None, wealth=None, date_of_birth=None, age_ def _format_for_multi_index(_yll: pd.Series): """Returns pd.Series which is the same as in the argument `_yll` except that the multi-index has been expanded to include sex and li_wealth and rearranged so that it matched the expected multi-index format - (sex/age_range/li_wealth/year).""" - return pd.DataFrame(_yll)\ - .assign(sex=sex, li_wealth=wealth)\ - .set_index(['sex', 'li_wealth'], append=True)\ - .reorder_levels(['sex', 'age_range', 'li_wealth', 'year'])[_yll.name] + (sex/age_range/li_wealth/district_of_residence/year).""" + return pd.DataFrame(_yll) \ + .assign(sex=sex, li_wealth=wealth, district_of_residence=district_of_residence) \ + .set_index(['sex', 'li_wealth', 'district_of_residence'], append=True) \ + .reorder_levels(['sex', 'age_range', 'li_wealth', 'district_of_residence', 'year'])[_yll.name] assert self.years_life_lost.index.equals(self.multi_index_for_age_and_wealth_and_time) assert self.years_life_lost_stacked_time.index.equals(self.multi_index_for_age_and_wealth_and_time) @@ -345,19 +351,19 @@ def _format_for_multi_index(_yll: pd.Series): end_date=( date_of_birth + pd.DateOffset(years=self.parameters['Age_Limit_For_YLL']) - pd.DateOffset(days=1)), date_of_birth=date_of_birth - ).groupby(level=1).sum()\ - .assign(year=date_of_death.year)\ - .set_index(['year'], append=True)['person_years']\ - .pipe(_format_for_multi_index) + ).groupby(level=1).sum() \ + .assign(year=date_of_death.year) \ + .set_index(['year'], append=True)['person_years'] \ + .pipe(_format_for_multi_index) # Get the years of live lost "stacked by age and time", whereby all the life-years lost up to the age_limit are # ascribed to the age of death and to the year of death. This is computed by collapsing the age-dimension of # `yll_stacked_by_time` onto the age(-range) of death. age_range_to_stack_to = age_range - yll_stacked_by_age_and_time = pd.DataFrame(yll_stacked_by_time.groupby(level=[0, 2, 3]).sum())\ - .assign(age_range=age_range_to_stack_to)\ - .set_index(['age_range'], append=True)['person_years']\ - .reorder_levels(['sex', 'age_range', 'li_wealth', 'year']) + yll_stacked_by_age_and_time = pd.DataFrame(yll_stacked_by_time.groupby(level=[0, 2, 3]).sum()) \ + .assign(age_range=age_range_to_stack_to) \ + .set_index(['age_range'], append=True)['person_years'] \ + .reorder_levels(['sex', 'age_range', 'li_wealth', 'year']) # Add the years-of-life-lost from this death to the overall YLL dataframe keeping track if cause_of_death not in self.years_life_lost.columns: @@ -418,11 +424,11 @@ def summarise_results_for_this_year(df, level=[0, 1]) -> pd.DataFrame: """Return pd.DataFrame that gives the summary of the `df` for the `year` by certain levels in the df's multi-index. The `level` argument gives a list of levels to use in `groupby`: e.g., level=[0,1] gives a summary of sex/age-group; and level=[2] gives a summary only by wealth category.""" - return df.loc[(slice(None), slice(None), slice(None), year)] \ - .groupby(level=level) \ - .sum() \ - .reset_index() \ - .assign(year=year) + return df.loc[(slice(None), slice(None), slice(None), slice(None), year)] \ + .groupby(level=level) \ + .sum() \ + .reset_index() \ + .assign(year=year) def log_df_line_by_line(key, description, df, force_cols=None) -> None: """Log each line of a dataframe to `logger.info`. Each row of the dataframe is one logged entry. @@ -533,10 +539,10 @@ def log_df_line_by_line(key, description, df, force_cols=None) -> None: def check_multi_index(self): """Check that the multi-index of the dataframes are as expected""" - assert self.years_life_lost.index.equals(self.multi_index_for_age_and_wealth_and_time) - assert self.years_life_lost_stacked_time.index.equals(self.multi_index_for_age_and_wealth_and_time) - assert self.years_life_lost_stacked_age_and_time.index.equals(self.multi_index_for_age_and_wealth_and_time) - assert self.years_lived_with_disability.index.equals(self.multi_index_for_age_and_wealth_and_time) + assert self.years_life_lost.index.equals(self.multi_index_for_age_and_wealth_and_time_and_region) + assert self.years_life_lost_stacked_time.index.equals(self.multi_index_for_age_and_wealth_and_time_and_region) + assert self.years_life_lost_stacked_age_and_time.index.equals(self.multi_index_for_age_and_wealth_and_time_and_region) + assert self.years_lived_with_disability.index.equals(self.multi_index_for_age_and_wealth_and_time_and_region) class Get_Current_DALYS(RegularEvent, PopulationScopeEventMixin): @@ -617,20 +623,20 @@ def apply(self, population): # 4) Summarise the results for this month wrt sex/age/wealth # - merge in age/wealth/sex information disease_specific_daly_values_this_month = disease_specific_daly_values_this_month.merge( - df.loc[idx_alive, ['sex', 'li_wealth', 'age_range']], left_index=True, right_index=True, how='left') + df.loc[idx_alive, ['sex', 'li_wealth', 'district_of_residence', 'age_range']], left_index=True, right_index=True, how='left') # - sum of daly_weight, by sex/age/wealth disability_monthly_summary = pd.DataFrame( - disease_specific_daly_values_this_month.groupby(['sex', 'age_range', 'li_wealth']).sum().fillna(0)) + disease_specific_daly_values_this_month.groupby(['sex', 'age_range', 'district_of_residence', 'li_wealth']).sum().fillna(0)) # - add the year into the multi-index disability_monthly_summary['year'] = self.sim.date.year disability_monthly_summary.set_index('year', append=True, inplace=True) disability_monthly_summary = disability_monthly_summary.reorder_levels( - ['sex', 'age_range', 'li_wealth', 'year']) + ['sex', 'age_range', 'li_wealth', 'district_of_residence', 'year']) # 5) Add the monthly summary to the overall dataframe for YearsLivedWithDisability - dalys_to_add = disability_monthly_summary.sum().sum() # for checking + dalys_to_add = disability_monthly_summary.sum().sum() # for checking dalys_current = self.module.years_lived_with_disability.sum().sum() # for checking # (Nb. this will add columns that are not otherwise present and add values to columns where they are.) @@ -642,11 +648,11 @@ def apply(self, population): # Merge into a dataframe with the correct multi-index (the multi-index from combine is subtly different) self.module.years_lived_with_disability = \ - pd.DataFrame(index=self.module.multi_index_for_age_and_wealth_and_time)\ - .merge(combined, left_index=True, right_index=True, how='left') + pd.DataFrame(index=self.module.multi_index_for_age_and_wealth_and_time_and_region) \ + .merge(combined, left_index=True, right_index=True, how='left') # Check multi-index is in check and that the addition of DALYS has worked - assert self.module.years_lived_with_disability.index.equals(self.module.multi_index_for_age_and_wealth_and_time) + assert self.module.years_lived_with_disability.index.equals(self.module.multi_index_for_age_and_wealth_and_time_and_region) assert abs(self.module.years_lived_with_disability.sum().sum() - (dalys_to_add + dalys_current)) < 1e-5 self.module.check_multi_index() @@ -660,3 +666,4 @@ def __init__(self, module): def apply(self, population): self.module.write_to_log(year=self.sim.date.year) + From 7f8360761e5d70f9b10f797392dd89c42c9988b2 Mon Sep 17 00:00:00 2001 From: Bingling Date: Fri, 19 Jun 2026 09:43:14 +0100 Subject: [PATCH 44/52] fix blank space after Clinical cadre in the resource file --- .../establishment_by_district_and_CNP.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_CNP.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_CNP.csv index 359980d82c..cf937973ce 100644 --- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_CNP.csv +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_CNP.csv @@ -1,4 +1,4 @@ -District,Clinical ,Nursing_and_Midwifery,Pharmacy,DCSA,Dental,Laboratory,Mental,Nutrition,Radiography +District,Clinical,Nursing_and_Midwifery,Pharmacy,DCSA,Dental,Laboratory,Mental,Nutrition,Radiography Balaka,1.174526999,1.169316612,2.052879982,1,1,1,1,1,1 Blantyre,0.759811383,0.89730864,1.578474944,1,1,1,1,1,1 Blantyre City,0.759811383,0.89730864,1.578474944,1,1,1,1,1,1 From ca5ae9197e18a1e770a17585b725a8b9caf10280 Mon Sep 17 00:00:00 2001 From: thewati Date: Fri, 19 Jun 2026 10:46:02 +0200 Subject: [PATCH 45/52] Fix errors from test file --- src/tlo/methods/demography.py | 2 ++ src/tlo/methods/healthburden.py | 30 ++++++++++++++---------------- tests/test_healthburden.py | 6 +++--- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py index 2acaad75eb..2c5d998fc4 100644 --- a/src/tlo/methods/demography.py +++ b/src/tlo/methods/demography.py @@ -555,6 +555,8 @@ def do_death(self, individual_id: int, cause: str, originating_module: Module): wealth=person['li_wealth'], date_of_birth=person['date_of_birth'], age_range=person['age_range'], + district_of_residence=person[ + 'district_of_residence'], cause_of_death=cause, ) diff --git a/src/tlo/methods/healthburden.py b/src/tlo/methods/healthburden.py index b19bfcaca5..bdb0349b0d 100644 --- a/src/tlo/methods/healthburden.py +++ b/src/tlo/methods/healthburden.py @@ -33,7 +33,7 @@ def __init__(self, name=None): super().__init__(name) # instance variables - self.multi_index_for_age_and_wealth_and_time = None + self.multi_index_for_age_and_wealth_and_time_and_region= None self.years_life_lost = None self.years_life_lost_stacked_time = None self.years_life_lost_stacked_age_and_time = None @@ -91,18 +91,15 @@ def initialise_simulation(self, sim): year_index = list(range(self.sim.start_date.year, self.sim.end_date.year + 1)) district_index = sim.modules['Demography'].PROPERTIES['district_of_residence'].categories - self.multi_index_for_age_and_wealth_and_time = pd.MultiIndex.from_product( - [sex_index, age_index, wealth_index, year_index], names=['sex', 'age_range', 'li_wealth', 'year']) - self.multi_index_for_age_and_wealth_and_time_and_region = pd.MultiIndex.from_product( [sex_index, age_index, wealth_index, district_index, year_index], names=['sex', 'age_range', 'li_wealth', 'district_of_residence', 'year']) # Create the YLL and YLD storage data-frame (using sex/age_range/year multi-index) - self.years_life_lost = pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time) - self.years_life_lost_stacked_time = pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time) - self.years_life_lost_stacked_age_and_time = pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time) - self.years_lived_with_disability = pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time) + self.years_life_lost = pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time_and_region) + self.years_life_lost_stacked_time = pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time_and_region) + self.years_life_lost_stacked_age_and_time = pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time_and_region) + self.years_lived_with_disability = pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time_and_region) # 2) Collect the module that will use this HealthBurden module self.recognised_modules_names = [ @@ -325,9 +322,9 @@ def _format_for_multi_index(_yll: pd.Series): .set_index(['sex', 'li_wealth', 'district_of_residence'], append=True) \ .reorder_levels(['sex', 'age_range', 'li_wealth', 'district_of_residence', 'year'])[_yll.name] - assert self.years_life_lost.index.equals(self.multi_index_for_age_and_wealth_and_time) - assert self.years_life_lost_stacked_time.index.equals(self.multi_index_for_age_and_wealth_and_time) - assert self.years_life_lost_stacked_age_and_time.index.equals(self.multi_index_for_age_and_wealth_and_time) + assert self.years_life_lost.index.equals(self.multi_index_for_age_and_wealth_and_time_and_region) + assert self.years_life_lost_stacked_time.index.equals(self.multi_index_for_age_and_wealth_and_time_and_region) + assert self.years_life_lost_stacked_age_and_time.index.equals(self.multi_index_for_age_and_wealth_and_time_and_region) # date from which years of life are lost date_of_death = self.sim.date @@ -360,10 +357,10 @@ def _format_for_multi_index(_yll: pd.Series): # ascribed to the age of death and to the year of death. This is computed by collapsing the age-dimension of # `yll_stacked_by_time` onto the age(-range) of death. age_range_to_stack_to = age_range - yll_stacked_by_age_and_time = pd.DataFrame(yll_stacked_by_time.groupby(level=[0, 2, 3]).sum()) \ + yll_stacked_by_age_and_time = pd.DataFrame(yll_stacked_by_time.groupby(level=[0, 2, 3, 4]).sum()) \ .assign(age_range=age_range_to_stack_to) \ .set_index(['age_range'], append=True)['person_years'] \ - .reorder_levels(['sex', 'age_range', 'li_wealth', 'year']) + .reorder_levels(['sex', 'age_range', 'li_wealth', 'district_of_residence', 'year']) # Add the years-of-life-lost from this death to the overall YLL dataframe keeping track if cause_of_death not in self.years_life_lost.columns: @@ -375,15 +372,16 @@ def _format_for_multi_index(_yll: pd.Series): # Add the life-years-lost from this death to the running total in LifeYearsLost dataframe self.years_life_lost[cause_of_death] = self.years_life_lost[cause_of_death].add( yll, fill_value=0) + self.years_life_lost_stacked_time[cause_of_death] = self.years_life_lost_stacked_time[cause_of_death].add( yll_stacked_by_time, fill_value=0) self.years_life_lost_stacked_age_and_time[cause_of_death] = \ self.years_life_lost_stacked_age_and_time[cause_of_death].add(yll_stacked_by_age_and_time, fill_value=0) # Check that the index of the YLL dataframe is not changed - assert self.years_life_lost.index.equals(self.multi_index_for_age_and_wealth_and_time) - assert self.years_life_lost_stacked_time.index.equals(self.multi_index_for_age_and_wealth_and_time) - assert self.years_life_lost_stacked_age_and_time.index.equals(self.multi_index_for_age_and_wealth_and_time) + assert self.years_life_lost.index.equals(self.multi_index_for_age_and_wealth_and_time_and_region) + assert self.years_life_lost_stacked_time.index.equals(self.multi_index_for_age_and_wealth_and_time_and_region) + assert self.years_life_lost_stacked_age_and_time.index.equals(self.multi_index_for_age_and_wealth_and_time_and_region) def decompose_yll_by_age_and_time(self, start_date, end_date, date_of_birth): """ diff --git a/tests/test_healthburden.py b/tests/test_healthburden.py index 4b28b3fd85..c4fde3e772 100644 --- a/tests/test_healthburden.py +++ b/tests/test_healthburden.py @@ -386,9 +386,9 @@ def test_airthmetic_of_lifeyearslost(seed, tmpdir): assert yll.sum().sum() == approx(1.0) # check that age-range is correct (0.5 ly lost among 0-4 year-olds; 0.5 ly lost to 5-9 year-olds) - assert yll.loc[('F', '0-4', slice(None), 2010)].sum().sum() == approx(0.5, abs=2.0 / DAYS_IN_YEAR) - assert yll.loc[('F', '5-9', slice(None), 2010)].sum().sum() == approx(0.5, abs=2.0 / DAYS_IN_YEAR) - assert yll.loc[('F', ['0-4', '5-9'], slice(None), 2010)].sum().sum() == approx(1.0, abs=0.5 / DAYS_IN_YEAR) + assert yll.loc[('F', '0-4', slice(None), slice(None), 2010)].sum().sum() == approx(0.5, abs=2.0 / DAYS_IN_YEAR) + assert yll.loc[('F', '5-9', slice(None), slice(None), 2010)].sum().sum() == approx(0.5, abs=2.0 / DAYS_IN_YEAR) + assert yll.loc[('F', ['0-4', '5-9'], slice(None), slice(None), 2010)].sum().sum() == approx(1.0, abs=0.5 / DAYS_IN_YEAR) @pytest.mark.slow From 556c8eeef908261258c06e0f10c21df28291583f Mon Sep 17 00:00:00 2001 From: Bingling Date: Fri, 19 Jun 2026 10:55:34 +0100 Subject: [PATCH 46/52] save staff_counts for local check --- .../nurses_analyses/analysis_staff_num_more_districts.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/scripts/nurses_analyses/analysis_staff_num_more_districts.py b/src/scripts/nurses_analyses/analysis_staff_num_more_districts.py index 8e1273f59e..0f4a520f48 100644 --- a/src/scripts/nurses_analyses/analysis_staff_num_more_districts.py +++ b/src/scripts/nurses_analyses/analysis_staff_num_more_districts.py @@ -466,8 +466,11 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No param_names=param_names ) - # for local check + # # for local check + # staff_counts = staff_counts + # staff_counts = staff_counts.xs(0, level="run", axis=1) # staff_counts = staff_counts.reset_index() + # staff_counts.to_csv(output_folder / "staff_counts_check.csv") # STEP 3: summarize runs print(type(staff_counts)) From 60e2a9d9004ad8dafbc8ab79b7c668167bb43dc2 Mon Sep 17 00:00:00 2001 From: Bingling Date: Fri, 19 Jun 2026 10:58:13 +0100 Subject: [PATCH 47/52] remove code for local check and keep useful edits --- .../nurses_analyses/analysis_staff_num_more_districts.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/scripts/nurses_analyses/analysis_staff_num_more_districts.py b/src/scripts/nurses_analyses/analysis_staff_num_more_districts.py index 0f4a520f48..d308279c6e 100644 --- a/src/scripts/nurses_analyses/analysis_staff_num_more_districts.py +++ b/src/scripts/nurses_analyses/analysis_staff_num_more_districts.py @@ -310,7 +310,8 @@ def get_yearly_hr_count(_df): Path("./resources/healthsystem/organisation/ResourceFile_Master_Facilities_List.csv") ).set_index("Facility_ID") - # Temporarily add District info for facilities at levels 3+ + # Add district info for facilities at levels 3+ that have nan district info, + # to avoid these facilities being dropped for fid in {128, 129, 130, 131, 132}: mfl.loc[fid, "District"] = mfl.loc[fid, "Facility_Name"] @@ -466,12 +467,6 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No param_names=param_names ) - # # for local check - # staff_counts = staff_counts - # staff_counts = staff_counts.xs(0, level="run", axis=1) - # staff_counts = staff_counts.reset_index() - # staff_counts.to_csv(output_folder / "staff_counts_check.csv") - # STEP 3: summarize runs print(type(staff_counts)) print(staff_counts.head()) From d68bb5a4664804a1aa3e975fd7432db45d09af26 Mon Sep 17 00:00:00 2001 From: Bingling Date: Fri, 19 Jun 2026 11:01:37 +0100 Subject: [PATCH 48/52] recover full run settings --- src/scripts/nurses_analyses/nurses_scenario_analyses.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/scripts/nurses_analyses/nurses_scenario_analyses.py b/src/scripts/nurses_analyses/nurses_scenario_analyses.py index 4a3793fd86..64219d4897 100644 --- a/src/scripts/nurses_analyses/nurses_scenario_analyses.py +++ b/src/scripts/nurses_analyses/nurses_scenario_analyses.py @@ -35,11 +35,11 @@ def __init__(self): self.resources = get_root_path() / "resources" self.seed = 0 self.start_date = Date(2010, 1, 1) - self.end_date = Date(2012, 1, 2) - self.pop_size = 30 + self.end_date = Date(2035, 1, 1) + self.pop_size = 100_000 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) - self.runs_per_draw = 1 + self.runs_per_draw = 10 def log_configuration(self): return { @@ -92,7 +92,7 @@ def _default_of_all_max_healthsystem_scenarios(self) -> Dict: def _get_scenarios(self) -> Dict[str, Dict]: """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario. """ - year_of_hr_scaling = 2011 + year_of_hr_scaling = 2027 return { "Baseline Nurses / Default Healthsystem Function": mix_scenarios( From e3424a61cdf33d9c9f1b1293bf3f7785d95ec8ff Mon Sep 17 00:00:00 2001 From: Bingling Date: Mon, 22 Jun 2026 15:24:06 +0100 Subject: [PATCH 49/52] fix typo in resource file --- .../default.csv | 2 +- .../establishment_by_district_and_N.csv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/default.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/default.csv index 69d96f3109..c10498edb8 100644 --- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/default.csv +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/default.csv @@ -1,4 +1,4 @@ -District,Clinical ,Nursing_and_Midwifery,Pharmacy,DCSA,Dental,Laboratory,Mental,Nutrition,Radiography +District,Clinical,Nursing_and_Midwifery,Pharmacy,DCSA,Dental,Laboratory,Mental,Nutrition,Radiography Balaka,1,1,1,1,1,1,1,1,1 Blantyre,1,1,1,1,1,1,1,1,1 Blantyre City,1,1,1,1,1,1,1,1,1 diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_N.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_N.csv index 68691eafae..dcdf638838 100644 --- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_N.csv +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_district_and_officer_type/establishment_by_district_and_N.csv @@ -1,4 +1,4 @@ -District,Clinical ,Nursing_and_Midwifery,Pharmacy,DCSA,Dental,Laboratory,Mental,Nutrition,Radiography +District,Clinical,Nursing_and_Midwifery,Pharmacy,DCSA,Dental,Laboratory,Mental,Nutrition,Radiography Balaka,1,1.169316612,1,1,1,1,1,1,1 Blantyre,1,0.89730864,1,1,1,1,1,1,1 Blantyre City,1,0.89730864,1,1,1,1,1,1,1 From d6108a9ca07150b93a80a36ae5d645eccb6f8f68 Mon Sep 17 00:00:00 2001 From: thewati Date: Tue, 23 Jun 2026 14:43:43 +0200 Subject: [PATCH 50/52] district totals must be equal to national totals --- src/tlo/methods/healthburden.py | 2 +- tests/test_healthburden.py | 51 ++++++++++++++++++++++++++++----- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/src/tlo/methods/healthburden.py b/src/tlo/methods/healthburden.py index bdb0349b0d..98d0dffc2a 100644 --- a/src/tlo/methods/healthburden.py +++ b/src/tlo/methods/healthburden.py @@ -418,7 +418,7 @@ def write_to_log(self, year: int): if year in self._years_written_to_log: return # Skip if the year has already been logged. - def summarise_results_for_this_year(df, level=[0, 1]) -> pd.DataFrame: + def summarise_results_for_this_year(df, level=[0, 1, 2, 3]) -> pd.DataFrame: """Return pd.DataFrame that gives the summary of the `df` for the `year` by certain levels in the df's multi-index. The `level` argument gives a list of levels to use in `groupby`: e.g., level=[0,1] gives a summary of sex/age-group; and level=[2] gives a summary only by wealth category.""" diff --git a/tests/test_healthburden.py b/tests/test_healthburden.py index c4fde3e772..d5957f770c 100644 --- a/tests/test_healthburden.py +++ b/tests/test_healthburden.py @@ -77,17 +77,54 @@ def test_run_with_healthburden_with_dummy_diseases(tmpdir, seed): dalys = output['tlo.methods.healthburden']['dalys'] dalys = dalys.drop(columns=['date']) + # Columns that are not DALY causes + index_cols = ['sex', 'age_range', 'li_wealth', 'district_of_residence', 'year'] + + # All remaining columns are DALY values + daly_cols = [c for c in dalys.columns if c not in index_cols] + + # Total national DALYs + national_totals = dalys[daly_cols].sum() + + # Total district DALYs + district_totals = (dalys.groupby('district_of_residence')[daly_cols].sum().sum()) + + pd.testing.assert_series_equal(national_totals.sort_index(), district_totals.sort_index(), check_dtype=False) + age_index = sim.modules['Demography'].AGE_RANGE_CATEGORIES sex_index = ['M', 'F'] + wealth_index = sim.modules['Lifestyle'].PROPERTIES['li_wealth'].categories + district_index = sim.modules['Demography'].PROPERTIES['district_of_residence'].categories year_index = list(range(start_date.year, end_date.year + 1)) - correct_multi_index = pd.MultiIndex.from_product([sex_index, age_index, year_index], - names=['sex', 'age_range', 'year']) - output_multi_index = dalys.set_index(['sex', 'age_range', 'year']).index + + correct_multi_index = pd.MultiIndex.from_product( + [sex_index, age_index, wealth_index, district_index, year_index], + names=['sex', 'age_range', 'li_wealth', 'district_of_residence', 'year'] + ) + + output_multi_index = dalys.set_index( + ['sex', 'age_range', 'li_wealth', 'district_of_residence', 'year']).index pd.testing.assert_index_equal(output_multi_index, correct_multi_index, check_order=False) + # Check total deaths in district are equal to total deaths at national level + yll = output['tlo.methods.healthburden']['yll_by_causes_of_death'] + yll = yll.drop(columns=['date']) + + index_cols = ['sex', 'age_range', 'li_wealth', 'district_of_residence', 'year'] + death_cols = [c for c in yll.columns if c not in index_cols] + + # Total national deaths + national_deaths = yll[death_cols].sum() + + # Total district deaths + district_deaths = (yll.groupby('district_of_residence')[death_cols].sum().sum()) + + pd.testing.assert_series_equal(national_deaths.sort_index(), district_deaths.sort_index(), check_dtype=False) + # check that there is a column for each 'label' that is registered - assert set(dalys.set_index(['sex', 'age_range', 'year']).columns) == \ - {'Other', 'Mockitis_Disability_And_Death', 'ChronicSyndrome_Disability_And_Death'} + assert (set( + dalys.set_index(['sex', 'age_range', 'li_wealth', 'district_of_residence', 'year']).columns) == + {'Other', 'Mockitis_Disability_And_Death', 'ChronicSyndrome_Disability_And_Death'}) @pytest.mark.slow @@ -486,8 +523,8 @@ def apply(self, individual_id): & (yld.age_range == age_range_at_disability_onset) & (yld.sex == sex) ) - assert (yld.loc[marker_for_disability, 'cause_of_disability_A'] == daly_wt * 1.0).all() - assert (yld.loc[~marker_for_disability, 'cause_of_disability_A'] == 0.0).all() + assert (yld.loc[marker_for_disability, 'cause_of_disability_A'].sum() == approx(daly_wt * 1.0)) + assert (yld.loc[~marker_for_disability, 'cause_of_disability_A'].sum() == approx(0.0)) # For the Non-Stacked Results # -- YLL From 0dbb33b805c72809cf3423233b9d591f8c7c9fca Mon Sep 17 00:00:00 2001 From: thewati Date: Tue, 23 Jun 2026 15:55:11 +0200 Subject: [PATCH 51/52] change long lines --- src/tlo/methods/healthburden.py | 24 ++++++++++++++++-------- tests/test_healthburden.py | 9 ++++++--- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/tlo/methods/healthburden.py b/src/tlo/methods/healthburden.py index 98d0dffc2a..cc2ec65fb4 100644 --- a/src/tlo/methods/healthburden.py +++ b/src/tlo/methods/healthburden.py @@ -98,7 +98,8 @@ def initialise_simulation(self, sim): # Create the YLL and YLD storage data-frame (using sex/age_range/year multi-index) self.years_life_lost = pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time_and_region) self.years_life_lost_stacked_time = pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time_and_region) - self.years_life_lost_stacked_age_and_time = pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time_and_region) + self.years_life_lost_stacked_age_and_time = ( + pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time_and_region)) self.years_lived_with_disability = pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time_and_region) # 2) Collect the module that will use this HealthBurden module @@ -302,7 +303,8 @@ def get_daly_weight(self, sequlae_code): return daly_wt - def report_live_years_lost(self, sex=None, wealth=None, date_of_birth=None, age_range=None, district_of_residence=None, cause_of_death=None): + def report_live_years_lost(self, sex=None, wealth=None, date_of_birth=None, + age_range=None, district_of_residence=None, cause_of_death=None): """ Calculate and store the period for which there is 'years of lost life' when someone dies (assuming that the person has died on today's date in the simulation). @@ -324,7 +326,8 @@ def _format_for_multi_index(_yll: pd.Series): assert self.years_life_lost.index.equals(self.multi_index_for_age_and_wealth_and_time_and_region) assert self.years_life_lost_stacked_time.index.equals(self.multi_index_for_age_and_wealth_and_time_and_region) - assert self.years_life_lost_stacked_age_and_time.index.equals(self.multi_index_for_age_and_wealth_and_time_and_region) + assert (self.years_life_lost_stacked_age_and_time.index. + equals(self.multi_index_for_age_and_wealth_and_time_and_region)) # date from which years of life are lost date_of_death = self.sim.date @@ -381,7 +384,8 @@ def _format_for_multi_index(_yll: pd.Series): # Check that the index of the YLL dataframe is not changed assert self.years_life_lost.index.equals(self.multi_index_for_age_and_wealth_and_time_and_region) assert self.years_life_lost_stacked_time.index.equals(self.multi_index_for_age_and_wealth_and_time_and_region) - assert self.years_life_lost_stacked_age_and_time.index.equals(self.multi_index_for_age_and_wealth_and_time_and_region) + assert (self.years_life_lost_stacked_age_and_time.index. + equals(self.multi_index_for_age_and_wealth_and_time_and_region)) def decompose_yll_by_age_and_time(self, start_date, end_date, date_of_birth): """ @@ -539,7 +543,8 @@ def check_multi_index(self): """Check that the multi-index of the dataframes are as expected""" assert self.years_life_lost.index.equals(self.multi_index_for_age_and_wealth_and_time_and_region) assert self.years_life_lost_stacked_time.index.equals(self.multi_index_for_age_and_wealth_and_time_and_region) - assert self.years_life_lost_stacked_age_and_time.index.equals(self.multi_index_for_age_and_wealth_and_time_and_region) + assert (self.years_life_lost_stacked_age_and_time.index. + equals(self.multi_index_for_age_and_wealth_and_time_and_region)) assert self.years_lived_with_disability.index.equals(self.multi_index_for_age_and_wealth_and_time_and_region) @@ -621,11 +626,13 @@ def apply(self, population): # 4) Summarise the results for this month wrt sex/age/wealth # - merge in age/wealth/sex information disease_specific_daly_values_this_month = disease_specific_daly_values_this_month.merge( - df.loc[idx_alive, ['sex', 'li_wealth', 'district_of_residence', 'age_range']], left_index=True, right_index=True, how='left') + df.loc[idx_alive, ['sex', 'li_wealth', 'district_of_residence', 'age_range']], + left_index=True, right_index=True, how='left') # - sum of daly_weight, by sex/age/wealth disability_monthly_summary = pd.DataFrame( - disease_specific_daly_values_this_month.groupby(['sex', 'age_range', 'district_of_residence', 'li_wealth']).sum().fillna(0)) + disease_specific_daly_values_this_month. + groupby(['sex', 'age_range', 'district_of_residence', 'li_wealth']).sum().fillna(0)) # - add the year into the multi-index disability_monthly_summary['year'] = self.sim.date.year @@ -650,7 +657,8 @@ def apply(self, population): .merge(combined, left_index=True, right_index=True, how='left') # Check multi-index is in check and that the addition of DALYS has worked - assert self.module.years_lived_with_disability.index.equals(self.module.multi_index_for_age_and_wealth_and_time_and_region) + assert (self.module.years_lived_with_disability.index. + equals(self.module.multi_index_for_age_and_wealth_and_time_and_region)) assert abs(self.module.years_lived_with_disability.sum().sum() - (dalys_to_add + dalys_current)) < 1e-5 self.module.check_multi_index() diff --git a/tests/test_healthburden.py b/tests/test_healthburden.py index d5957f770c..799f1cedc6 100644 --- a/tests/test_healthburden.py +++ b/tests/test_healthburden.py @@ -423,9 +423,12 @@ def test_airthmetic_of_lifeyearslost(seed, tmpdir): assert yll.sum().sum() == approx(1.0) # check that age-range is correct (0.5 ly lost among 0-4 year-olds; 0.5 ly lost to 5-9 year-olds) - assert yll.loc[('F', '0-4', slice(None), slice(None), 2010)].sum().sum() == approx(0.5, abs=2.0 / DAYS_IN_YEAR) - assert yll.loc[('F', '5-9', slice(None), slice(None), 2010)].sum().sum() == approx(0.5, abs=2.0 / DAYS_IN_YEAR) - assert yll.loc[('F', ['0-4', '5-9'], slice(None), slice(None), 2010)].sum().sum() == approx(1.0, abs=0.5 / DAYS_IN_YEAR) + assert (yll.loc[('F', '0-4', slice(None), slice(None), 2010)].sum().sum() + == approx(0.5, abs=2.0 / DAYS_IN_YEAR)) + assert (yll.loc[('F', '5-9', slice(None), slice(None), 2010)].sum().sum() + == approx(0.5, abs=2.0 / DAYS_IN_YEAR)) + assert (yll.loc[('F', ['0-4', '5-9'], slice(None), slice(None), 2010)].sum().sum() + == approx(1.0, abs=0.5 / DAYS_IN_YEAR)) @pytest.mark.slow From b3c122a2610025dae938f810e53e0e2773756343 Mon Sep 17 00:00:00 2001 From: thewati Date: Tue, 23 Jun 2026 15:57:36 +0200 Subject: [PATCH 52/52] 1 scenario, 1 draw --- .../nurses_scenario_analyses.py | 240 +++++++++--------- 1 file changed, 120 insertions(+), 120 deletions(-) diff --git a/src/scripts/nurses_analyses/nurses_scenario_analyses.py b/src/scripts/nurses_analyses/nurses_scenario_analyses.py index 64219d4897..83d7280646 100644 --- a/src/scripts/nurses_analyses/nurses_scenario_analyses.py +++ b/src/scripts/nurses_analyses/nurses_scenario_analyses.py @@ -39,7 +39,7 @@ def __init__(self): self.pop_size = 100_000 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) - self.runs_per_draw = 10 + self.runs_per_draw = 1 def log_configuration(self): return { @@ -94,115 +94,115 @@ def _get_scenarios(self) -> Dict[str, Dict]: """ year_of_hr_scaling = 2027 return { - "Baseline Nurses / Default Healthsystem Function": - mix_scenarios( - self._default_of_all_scenarios, - { - "HealthSystem": { - 'HR_scaling_by_level_and_officer_type_mode': "default", - "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, - }, - }, - ), - - "Fewer Nurses / Default Healthsystem Function": - mix_scenarios( - self._default_of_all_scenarios, - { - "HealthSystem": { - 'HR_scaling_by_level_and_officer_type_mode': "worse_staffing_N", - "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, - }, - }, - ), - - "More Nurses / Default Healthsystem Function": - mix_scenarios( - self._default_of_all_scenarios, - { - "HealthSystem": { - 'HR_scaling_by_level_and_officer_type_mode': "establishment_staffing_N", - "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, - }, - }, - ), - - "Baseline Nurses / Improved Healthsystem Function": - mix_scenarios( - self._default_of_all_max_healthsystem_scenarios, - { - "HealthSystem": { - 'HR_scaling_by_level_and_officer_type_mode': "default", - "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, - }, - }, - ), - - "Fewer Nurses / Improved Healthsystem Function": - mix_scenarios( - self._default_of_all_max_healthsystem_scenarios, - { - "HealthSystem": { - 'HR_scaling_by_level_and_officer_type_mode': "worse_staffing_N", - "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, - }, - }, - ), - - "More Nurses / Improved Healthsystem Function": - mix_scenarios( - self._default_of_all_max_healthsystem_scenarios, - { - "HealthSystem": { - 'HR_scaling_by_level_and_officer_type_mode': "establishment_staffing_N", - "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, - }, - }, - ), - - "More CNP staff / Default Healthsystem Function": - mix_scenarios( - self._default_of_all_scenarios, - { - "HealthSystem": { - 'HR_scaling_by_level_and_officer_type_mode': "establishment_staffing_CNP", - "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, - }, - }, - ), - - "More CNP staff / Improved Healthsystem Function": - mix_scenarios( - self._default_of_all_max_healthsystem_scenarios, - { - "HealthSystem": { - 'HR_scaling_by_level_and_officer_type_mode': "establishment_staffing_CNP", - "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, - }, - }, - ), - - "More Nurses by District / Default Healthsystem Function": - mix_scenarios( - self._default_of_all_scenarios, - { - "HealthSystem": { - 'HR_scaling_by_district_and_officer_type_mode': "establishment_by_district_and_N", - "year_HR_scaling_by_district_and_officer_type": year_of_hr_scaling, - }, - }, - ), - - "More Nurses by District / Improved Healthsystem Function": - mix_scenarios( - self._default_of_all_max_healthsystem_scenarios, - { - "HealthSystem": { - 'HR_scaling_by_district_and_officer_type_mode': "establishment_by_district_and_N", - "year_HR_scaling_by_district_and_officer_type": year_of_hr_scaling, - }, - }, - ), + # "Baseline Nurses / Default Healthsystem Function": + # mix_scenarios( + # self._default_of_all_scenarios, + # { + # "HealthSystem": { + # 'HR_scaling_by_level_and_officer_type_mode': "default", + # "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, + # }, + # }, + # ), + # + # "Fewer Nurses / Default Healthsystem Function": + # mix_scenarios( + # self._default_of_all_scenarios, + # { + # "HealthSystem": { + # 'HR_scaling_by_level_and_officer_type_mode': "worse_staffing_N", + # "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, + # }, + # }, + # ), + # + # "More Nurses / Default Healthsystem Function": + # mix_scenarios( + # self._default_of_all_scenarios, + # { + # "HealthSystem": { + # 'HR_scaling_by_level_and_officer_type_mode': "establishment_staffing_N", + # "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, + # }, + # }, + # ), + # + # "Baseline Nurses / Improved Healthsystem Function": + # mix_scenarios( + # self._default_of_all_max_healthsystem_scenarios, + # { + # "HealthSystem": { + # 'HR_scaling_by_level_and_officer_type_mode': "default", + # "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, + # }, + # }, + # ), + # + # "Fewer Nurses / Improved Healthsystem Function": + # mix_scenarios( + # self._default_of_all_max_healthsystem_scenarios, + # { + # "HealthSystem": { + # 'HR_scaling_by_level_and_officer_type_mode': "worse_staffing_N", + # "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, + # }, + # }, + # ), + # + # "More Nurses / Improved Healthsystem Function": + # mix_scenarios( + # self._default_of_all_max_healthsystem_scenarios, + # { + # "HealthSystem": { + # 'HR_scaling_by_level_and_officer_type_mode': "establishment_staffing_N", + # "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, + # }, + # }, + # ), + # + # "More CNP staff / Default Healthsystem Function": + # mix_scenarios( + # self._default_of_all_scenarios, + # { + # "HealthSystem": { + # 'HR_scaling_by_level_and_officer_type_mode': "establishment_staffing_CNP", + # "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, + # }, + # }, + # ), + # + # "More CNP staff / Improved Healthsystem Function": + # mix_scenarios( + # self._default_of_all_max_healthsystem_scenarios, + # { + # "HealthSystem": { + # 'HR_scaling_by_level_and_officer_type_mode': "establishment_staffing_CNP", + # "year_HR_scaling_by_level_and_officer_type": year_of_hr_scaling, + # }, + # }, + # ), + # + # "More Nurses by District / Default Healthsystem Function": + # mix_scenarios( + # self._default_of_all_scenarios, + # { + # "HealthSystem": { + # 'HR_scaling_by_district_and_officer_type_mode': "establishment_by_district_and_N", + # "year_HR_scaling_by_district_and_officer_type": year_of_hr_scaling, + # }, + # }, + # ), + # + # "More Nurses by District / Improved Healthsystem Function": + # mix_scenarios( + # self._default_of_all_max_healthsystem_scenarios, + # { + # "HealthSystem": { + # 'HR_scaling_by_district_and_officer_type_mode': "establishment_by_district_and_N", + # "year_HR_scaling_by_district_and_officer_type": year_of_hr_scaling, + # }, + # }, + # ), "More CNP staff by District / Default Healthsystem Function": mix_scenarios( @@ -215,16 +215,16 @@ def _get_scenarios(self) -> Dict[str, Dict]: }, ), - "More CNP staff by District / Improved Healthsystem Function": - mix_scenarios( - self._default_of_all_max_healthsystem_scenarios, - { - "HealthSystem": { - 'HR_scaling_by_district_and_officer_type_mode': "establishment_by_district_and_CNP", - "year_HR_scaling_by_district_and_officer_type": year_of_hr_scaling, - }, - }, - ), + # "More CNP staff by District / Improved Healthsystem Function": + # mix_scenarios( + # self._default_of_all_max_healthsystem_scenarios, + # { + # "HealthSystem": { + # 'HR_scaling_by_district_and_officer_type_mode': "establishment_by_district_and_CNP", + # "year_HR_scaling_by_district_and_officer_type": year_of_hr_scaling, + # }, + # }, + # ), }