From cd9c27ad1750a4ecb5b9aa3f7528988da2dda723 Mon Sep 17 00:00:00 2001 From: Nicholas Hathaway Date: Wed, 19 Nov 2025 20:58:28 -0500 Subject: [PATCH 1/3] added function to add travel info; --- src/pmotools/pmo_engine/pmo_processor.py | 109 ++++++++++- tests/test_pmo_engine/test_pmo_processor.py | 207 ++++++++++++++++++++ 2 files changed, 314 insertions(+), 2 deletions(-) diff --git a/src/pmotools/pmo_engine/pmo_processor.py b/src/pmotools/pmo_engine/pmo_processor.py index 44288e6..57dcf5c 100644 --- a/src/pmotools/pmo_engine/pmo_processor.py +++ b/src/pmotools/pmo_engine/pmo_processor.py @@ -3,8 +3,9 @@ import os from typing import NamedTuple import copy -import pandas +import re import pandas as pd + from collections import defaultdict from pmotools.pmo_engine.pmo_checker import PMOChecker @@ -407,7 +408,7 @@ def list_library_sample_names_per_specimen_name( pmodata, select_specimen_ids: list[int] = None, select_specimen_names: list[str] = None, - ) -> pandas.DataFrame: + ) -> pd.DataFrame: """ List all the library_sample_names per specimen_name :param pmodata: the PMO @@ -1481,3 +1482,107 @@ def extract_panels_insert_bed_loc( ) bed_loc_out[panel_id] = bed_loc_out_per_panel return bed_loc_out + + @staticmethod + def update_specimen_meta_with_traveler_info( + pmo, + traveler_info: pd.DataFrame, + specimen_name_col: str = "specimen_name", + travel_country_col: str = "travel_country", + travel_start_col: str = "travel_start_date", + travel_end_col: str = "travel_end_date", + bed_net_usage_col: str = None, + geo_admin1_col: str = None, + geo_admin2_col: str = None, + geo_admin3_col: str = None, + lat_lon_col: str = None, + replace_current_traveler_info: bool = False, + ): + """ + Update a PMO's specimen's metadata with travel info + :param pmo: the PMO to update, will directly modify this PMO + :param traveler_info: the traveler info + :param specimen_name_col: the specimen name column within the traveler input table + :param travel_country_col: the column name containing the traveled to country + :param travel_start_col: the column name containing the traveled start date, format YYYY-MM-DD or YYYY-MM + :param travel_end_col: the column name containing the traveled end date, format YYYY-MM-DD or YYYY-MM + :param bed_net_usage_col: a number between 0 - 1 for rough frequency of bednet usage while traveling + :param geo_admin1_col: the column name containing the traveled to country admin level 1 info + :param geo_admin2_col: the column name containing the traveled to country admin level 2 info + :param geo_admin3_col: the column name containing the traveled to country admin level 3 info + :param lat_lon_col: the latitude and longitude column name containing the region traveled to latitude and longitude + :param replace_current_traveler_info: whether to replace current travel info + :return: a reference to the updated PMO + """ + required_cols = [ + specimen_name_col, + travel_country_col, + travel_start_col, + travel_end_col, + ] + if bed_net_usage_col is not None: + required_cols.append(bed_net_usage_col) + if geo_admin1_col is not None: + required_cols.append(geo_admin1_col) + if geo_admin2_col is not None: + required_cols.append(geo_admin2_col) + if geo_admin3_col is not None: + required_cols.append(geo_admin3_col) + if lat_lon_col is not None: + required_cols.append(lat_lon_col) + + if not set(required_cols).issubset(traveler_info.columns): + raise Exception( + "missing traveler_info columns: " + ",".join(required_cols), + " columns in table: " + ",".join(traveler_info.columns), + ) + + specimen_names_in_pmo = set(PMOProcessor.get_specimen_names(pmo)) + specimen_names_in_traveler_info = set( + traveler_info[specimen_name_col].astype(str).tolist() + ) + + # check to see if provided traveler info for a specimen that cannot be found in PMO + missing_traveler_specs = specimen_names_in_traveler_info - specimen_names_in_pmo + + if missing_traveler_specs: + raise ValueError( + f"Provided traveler info for the following specimens but they are missing from the PMO: {sorted(missing_traveler_specs)}" + ) + # Matches YYYY-MM or YYYY-MM-DD + date_regex = re.compile(r"^\d{4}-\d{2}(-\d{2})?$") + traveler_info_records = traveler_info[required_cols].to_dict(orient="records") + spec_indexs = PMOProcessor.get_index_key_of_specimen_names(pmo) + + # prep traveler info lists, clear the list if we are replacing or start an empty list to append to if none exist already + for specimen_name in specimen_names_in_traveler_info: + if ( + replace_current_traveler_info + or "travel_out_six_month" + not in pmo["specimen_info"][spec_indexs[specimen_name]] + ): + pmo["specimen_info"][spec_indexs[specimen_name]][ + "travel_out_six_month" + ] = [] + + for travel_rec in traveler_info_records: + specimen_name = str(travel_rec[specimen_name_col]) + # Validate date formats + for date_col in (travel_start_col, travel_end_col): + val = travel_rec[date_col] + if pd.isna(val): + raise ValueError( + f"Missing required date value in column '{date_col}' for specimen '{specimen_name}'" + ) + val_str = str(val) + if not date_regex.match(val_str): + raise ValueError( + f"Invalid date format in '{date_col}' for specimen '{specimen_name}': '{val_str}'. " + f"Expected YYYY-MM or YYYY-MM-DD" + ) + # add in travel_rec + travel_rec.pop(specimen_name_col, None) + pmo["specimen_info"][spec_indexs[specimen_name]][ + "travel_out_six_month" + ].append(travel_rec) + return pmo diff --git a/tests/test_pmo_engine/test_pmo_processor.py b/tests/test_pmo_engine/test_pmo_processor.py index b6e0fba..e7fd833 100755 --- a/tests/test_pmo_engine/test_pmo_processor.py +++ b/tests/test_pmo_engine/test_pmo_processor.py @@ -873,6 +873,213 @@ def test_get_panel_names(self): names = PMOProcessor.get_panel_names(pmo_data_combined) self.assertEqual(["heomev1"], names) + def test_update_specimen_meta_with_traveler_info(self): + test_pmo = { + "specimen_info": [{"specimen_name": "spec1"}, {"specimen_name": "spec2"}], + } + traveler_info = pd.DataFrame( + { + "specimen_name": ["spec1", "spec1", "spec2"], + "travel_country": ["Kenya", "Kenya", "Tanzania"], + "travel_start_date": ["2024-01", "2024-04", "2024-02-15"], + "travel_end_date": ["2024-02", "2024-06", "2024-02-27"], + } + ) + + PMOProcessor.update_specimen_meta_with_traveler_info(test_pmo, traveler_info) + test_out_pmo = { + "specimen_info": [ + { + "specimen_name": "spec1", + "travel_out_six_month": [ + { + "travel_country": "Kenya", + "travel_start_date": "2024-01", + "travel_end_date": "2024-02", + }, + { + "travel_country": "Kenya", + "travel_start_date": "2024-04", + "travel_end_date": "2024-06", + }, + ], + }, + { + "specimen_name": "spec2", + "travel_out_six_month": [ + { + "travel_country": "Tanzania", + "travel_start_date": "2024-02-15", + "travel_end_date": "2024-02-27", + } + ], + }, + ] + } + self.assertEqual(test_out_pmo, test_pmo) + + def test_update_specimen_meta_with_traveler_info_raises(self): + test_pmo = { + "specimen_info": [{"specimen_name": "spec1"}, {"specimen_name": "spec2"}], + } + traveler_info = pd.DataFrame( + { + "specimen_name": ["spec1", "spec2"], + "travel_country": ["Kenya", "Tanzania"], + "travel_start_date": ["24-01", "2024-02"], # BAD: "24-01" + "travel_end_date": ["2024-02-05", "2024-03"], + } + ) + + with self.assertRaises(ValueError): + PMOProcessor.update_specimen_meta_with_traveler_info( + test_pmo, traveler_info + ) + + def test_update_specimen_meta_with_traveler_info_with_optional(self): + test_pmo = { + "specimen_info": [{"specimen_name": "spec1"}, {"specimen_name": "spec2"}], + } + traveler_info = pd.DataFrame( + { + "specimen_name": ["spec1", "spec2"], + "travel_country": ["Kenya", "Tanzania"], + "travel_start_date": ["2024-01", "2024-02"], + "travel_end_date": ["2024-01-20", "2024-02-15"], + "bed_net": [0.50, 0.0], + "admin1": ["Nairobi", "Dar es Salaam"], + "admin2": ["SubCounty1", "SubCounty2"], + "admin3": ["Ward1", "Ward2"], + "latlon": ["-1.2921,36.8219", "-6.7924,39.2083"], + } + ) + + PMOProcessor.update_specimen_meta_with_traveler_info( + test_pmo, + traveler_info, + bed_net_usage_col="bed_net", + geo_admin1_col="admin1", + geo_admin2_col="admin2", + geo_admin3_col="admin3", + lat_lon_col="latlon", + ) + test_out_pmo = { + "specimen_info": [ + { + "specimen_name": "spec1", + "travel_out_six_month": [ + { + "travel_country": "Kenya", + "travel_start_date": "2024-01", + "travel_end_date": "2024-01-20", + "bed_net": 0.5, + "admin1": "Nairobi", + "admin2": "SubCounty1", + "admin3": "Ward1", + "latlon": "-1.2921,36.8219", + } + ], + }, + { + "specimen_name": "spec2", + "travel_out_six_month": [ + { + "travel_country": "Tanzania", + "travel_start_date": "2024-02", + "travel_end_date": "2024-02-15", + "bed_net": 0.0, + "admin1": "Dar es Salaam", + "admin2": "SubCounty2", + "admin3": "Ward2", + "latlon": "-6.7924,39.2083", + } + ], + }, + ] + } + self.assertEqual(test_out_pmo, test_pmo) + + def test_update_specimen_meta_with_traveler_info_with_optional_replace_old(self): + test_pmo = { + "specimen_info": [ + { + "specimen_name": "spec1", + "travel_out_six_month": [ + { + "travel_country": "Kenya", + "travel_start_date": "2024-01", + "travel_end_date": "2024-02", + }, + { + "travel_country": "Kenya", + "travel_start_date": "2024-04", + "travel_end_date": "2024-06", + }, + ], + }, + {"specimen_name": "spec2"}, + ], + } + traveler_info = pd.DataFrame( + { + "specimen_name": ["spec1", "spec2"], + "travel_country": ["Kenya", "Tanzania"], + "travel_start_date": ["2024-01", "2024-02"], + "travel_end_date": ["2024-01-20", "2024-02-15"], + "bed_net": [0.50, 0.0], + "admin1": ["Nairobi", "Dar es Salaam"], + "admin2": ["SubCounty1", "SubCounty2"], + "admin3": ["Ward1", "Ward2"], + "latlon": ["-1.2921,36.8219", "-6.7924,39.2083"], + } + ) + + PMOProcessor.update_specimen_meta_with_traveler_info( + test_pmo, + traveler_info, + bed_net_usage_col="bed_net", + geo_admin1_col="admin1", + geo_admin2_col="admin2", + geo_admin3_col="admin3", + lat_lon_col="latlon", + replace_current_traveler_info=True, + ) + test_out_pmo = { + "specimen_info": [ + { + "specimen_name": "spec1", + "travel_out_six_month": [ + { + "travel_country": "Kenya", + "travel_start_date": "2024-01", + "travel_end_date": "2024-01-20", + "bed_net": 0.5, + "admin1": "Nairobi", + "admin2": "SubCounty1", + "admin3": "Ward1", + "latlon": "-1.2921,36.8219", + } + ], + }, + { + "specimen_name": "spec2", + "travel_out_six_month": [ + { + "travel_country": "Tanzania", + "travel_start_date": "2024-02", + "travel_end_date": "2024-02-15", + "bed_net": 0.0, + "admin1": "Dar es Salaam", + "admin2": "SubCounty2", + "admin3": "Ward2", + "latlon": "-6.7924,39.2083", + } + ], + }, + ] + } + self.assertEqual(test_out_pmo, test_pmo) + if __name__ == "__main__": unittest.main() From a20cc2d41e27cd4df775322df9308820eba5074b Mon Sep 17 00:00:00 2001 From: Nicholas Hathaway Date: Mon, 24 Nov 2025 13:44:49 -0800 Subject: [PATCH 2/3] moved to update traveler info to pmo_builder; change checking of date to use the datetime library; --- .../pmo_builder/functions_to_update_meta.py | 125 ++++++++++ src/pmotools/pmo_engine/pmo_processor.py | 105 -------- .../test_functions_to_update_meta.py | 228 ++++++++++++++++++ tests/test_pmo_engine/test_pmo_processor.py | 207 ---------------- 4 files changed, 353 insertions(+), 312 deletions(-) create mode 100644 src/pmotools/pmo_builder/functions_to_update_meta.py create mode 100755 tests/test_pmo_builder/test_functions_to_update_meta.py diff --git a/src/pmotools/pmo_builder/functions_to_update_meta.py b/src/pmotools/pmo_builder/functions_to_update_meta.py new file mode 100644 index 0000000..7291f09 --- /dev/null +++ b/src/pmotools/pmo_builder/functions_to_update_meta.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 + +import pandas as pd +from pmotools.pmo_engine.pmo_processor import PMOProcessor +from datetime import datetime + + +class PMOUpdater(object): + @staticmethod + def check_if_date_yyyy_mm_or_yyyy_mm_dd(date_string: str) -> bool: + """ + Checks if a string is in YYYY-MM or YYYY-MM-DD format. + :param date_string: the string to be checked + """ + try: + datetime.strptime(date_string, "%Y-%m-%d") + return True # Matches YYYY-MM-DD + except ValueError: + try: + datetime.strptime(date_string, "%Y-%m") + return True # Matches YYYY-MM + except ValueError: + return False # Does not match either format + + @staticmethod + def update_specimen_meta_with_traveler_info( + pmo, + traveler_info: pd.DataFrame, + specimen_name_col: str = "specimen_name", + travel_country_col: str = "travel_country", + travel_start_col: str = "travel_start_date", + travel_end_col: str = "travel_end_date", + bed_net_usage_col: str = None, + geo_admin1_col: str = None, + geo_admin2_col: str = None, + geo_admin3_col: str = None, + lat_lon_col: str = None, + replace_current_traveler_info: bool = False, + ): + """ + Update a PMO's specimen's metadata with travel info + :param pmo: the PMO to update, will directly modify this PMO + :param traveler_info: the traveler info + :param specimen_name_col: the specimen name column within the traveler input table + :param travel_country_col: the column name containing the traveled to country + :param travel_start_col: the column name containing the traveled start date, format YYYY-MM-DD or YYYY-MM + :param travel_end_col: the column name containing the traveled end date, format YYYY-MM-DD or YYYY-MM + :param bed_net_usage_col: (Optional) a number between 0 - 1 for rough frequency of bednet usage while traveling + :param geo_admin1_col: (Optional) the column name containing the traveled to country admin level 1 info + :param geo_admin2_col: (Optional) the column name containing the traveled to country admin level 2 info + :param geo_admin3_col: (Optional) the column name containing the traveled to country admin level 3 info + :param lat_lon_col: (Optional) the latitude and longitude column name containing the region traveled to latitude and longitude + :param replace_current_traveler_info: whether to replace current travel info + :return: a reference to the updated PMO + """ + required_cols = [ + specimen_name_col, + travel_country_col, + travel_start_col, + travel_end_col, + ] + if bed_net_usage_col is not None: + required_cols.append(bed_net_usage_col) + if geo_admin1_col is not None: + required_cols.append(geo_admin1_col) + if geo_admin2_col is not None: + required_cols.append(geo_admin2_col) + if geo_admin3_col is not None: + required_cols.append(geo_admin3_col) + if lat_lon_col is not None: + required_cols.append(lat_lon_col) + + if not set(required_cols).issubset(traveler_info.columns): + raise Exception( + "missing traveler_info columns: " + ",".join(required_cols), + " columns in table: " + ",".join(traveler_info.columns), + ) + + specimen_names_in_pmo = set(PMOProcessor.get_specimen_names(pmo)) + specimen_names_in_traveler_info = set( + traveler_info[specimen_name_col].astype(str).tolist() + ) + + # check to see if provided traveler info for a specimen that cannot be found in PMO + missing_traveler_specs = specimen_names_in_traveler_info - specimen_names_in_pmo + + if missing_traveler_specs: + raise ValueError( + f"Provided traveler info for the following specimens but they are missing from the PMO: {sorted(missing_traveler_specs)}" + ) + traveler_info_records = traveler_info[required_cols].to_dict(orient="records") + spec_indexs = PMOProcessor.get_index_key_of_specimen_names(pmo) + + # prep traveler info lists, clear the list if we are replacing or start an empty list to append to if none exist already + for specimen_name in specimen_names_in_traveler_info: + if ( + replace_current_traveler_info + or "travel_out_six_month" + not in pmo["specimen_info"][spec_indexs[specimen_name]] + ): + pmo["specimen_info"][spec_indexs[specimen_name]][ + "travel_out_six_month" + ] = [] + + for travel_rec in traveler_info_records: + specimen_name = str(travel_rec[specimen_name_col]) + # Validate date formats + for date_col in (travel_start_col, travel_end_col): + val = travel_rec[date_col] + if pd.isna(val): + raise ValueError( + f"Missing required date value in column '{date_col}' for specimen '{specimen_name}'" + ) + val_str = str(val) + if not PMOUpdater.check_if_date_yyyy_mm_or_yyyy_mm_dd(val_str): + raise ValueError( + f"Invalid date format in '{date_col}' for specimen '{specimen_name}': '{val_str}'. " + f"Expected YYYY-MM or YYYY-MM-DD" + ) + # add in travel_rec + travel_rec.pop(specimen_name_col, None) + pmo["specimen_info"][spec_indexs[specimen_name]][ + "travel_out_six_month" + ].append(travel_rec) + return pmo diff --git a/src/pmotools/pmo_engine/pmo_processor.py b/src/pmotools/pmo_engine/pmo_processor.py index 57dcf5c..291f748 100644 --- a/src/pmotools/pmo_engine/pmo_processor.py +++ b/src/pmotools/pmo_engine/pmo_processor.py @@ -3,7 +3,6 @@ import os from typing import NamedTuple import copy -import re import pandas as pd from collections import defaultdict @@ -1482,107 +1481,3 @@ def extract_panels_insert_bed_loc( ) bed_loc_out[panel_id] = bed_loc_out_per_panel return bed_loc_out - - @staticmethod - def update_specimen_meta_with_traveler_info( - pmo, - traveler_info: pd.DataFrame, - specimen_name_col: str = "specimen_name", - travel_country_col: str = "travel_country", - travel_start_col: str = "travel_start_date", - travel_end_col: str = "travel_end_date", - bed_net_usage_col: str = None, - geo_admin1_col: str = None, - geo_admin2_col: str = None, - geo_admin3_col: str = None, - lat_lon_col: str = None, - replace_current_traveler_info: bool = False, - ): - """ - Update a PMO's specimen's metadata with travel info - :param pmo: the PMO to update, will directly modify this PMO - :param traveler_info: the traveler info - :param specimen_name_col: the specimen name column within the traveler input table - :param travel_country_col: the column name containing the traveled to country - :param travel_start_col: the column name containing the traveled start date, format YYYY-MM-DD or YYYY-MM - :param travel_end_col: the column name containing the traveled end date, format YYYY-MM-DD or YYYY-MM - :param bed_net_usage_col: a number between 0 - 1 for rough frequency of bednet usage while traveling - :param geo_admin1_col: the column name containing the traveled to country admin level 1 info - :param geo_admin2_col: the column name containing the traveled to country admin level 2 info - :param geo_admin3_col: the column name containing the traveled to country admin level 3 info - :param lat_lon_col: the latitude and longitude column name containing the region traveled to latitude and longitude - :param replace_current_traveler_info: whether to replace current travel info - :return: a reference to the updated PMO - """ - required_cols = [ - specimen_name_col, - travel_country_col, - travel_start_col, - travel_end_col, - ] - if bed_net_usage_col is not None: - required_cols.append(bed_net_usage_col) - if geo_admin1_col is not None: - required_cols.append(geo_admin1_col) - if geo_admin2_col is not None: - required_cols.append(geo_admin2_col) - if geo_admin3_col is not None: - required_cols.append(geo_admin3_col) - if lat_lon_col is not None: - required_cols.append(lat_lon_col) - - if not set(required_cols).issubset(traveler_info.columns): - raise Exception( - "missing traveler_info columns: " + ",".join(required_cols), - " columns in table: " + ",".join(traveler_info.columns), - ) - - specimen_names_in_pmo = set(PMOProcessor.get_specimen_names(pmo)) - specimen_names_in_traveler_info = set( - traveler_info[specimen_name_col].astype(str).tolist() - ) - - # check to see if provided traveler info for a specimen that cannot be found in PMO - missing_traveler_specs = specimen_names_in_traveler_info - specimen_names_in_pmo - - if missing_traveler_specs: - raise ValueError( - f"Provided traveler info for the following specimens but they are missing from the PMO: {sorted(missing_traveler_specs)}" - ) - # Matches YYYY-MM or YYYY-MM-DD - date_regex = re.compile(r"^\d{4}-\d{2}(-\d{2})?$") - traveler_info_records = traveler_info[required_cols].to_dict(orient="records") - spec_indexs = PMOProcessor.get_index_key_of_specimen_names(pmo) - - # prep traveler info lists, clear the list if we are replacing or start an empty list to append to if none exist already - for specimen_name in specimen_names_in_traveler_info: - if ( - replace_current_traveler_info - or "travel_out_six_month" - not in pmo["specimen_info"][spec_indexs[specimen_name]] - ): - pmo["specimen_info"][spec_indexs[specimen_name]][ - "travel_out_six_month" - ] = [] - - for travel_rec in traveler_info_records: - specimen_name = str(travel_rec[specimen_name_col]) - # Validate date formats - for date_col in (travel_start_col, travel_end_col): - val = travel_rec[date_col] - if pd.isna(val): - raise ValueError( - f"Missing required date value in column '{date_col}' for specimen '{specimen_name}'" - ) - val_str = str(val) - if not date_regex.match(val_str): - raise ValueError( - f"Invalid date format in '{date_col}' for specimen '{specimen_name}': '{val_str}'. " - f"Expected YYYY-MM or YYYY-MM-DD" - ) - # add in travel_rec - travel_rec.pop(specimen_name_col, None) - pmo["specimen_info"][spec_indexs[specimen_name]][ - "travel_out_six_month" - ].append(travel_rec) - return pmo diff --git a/tests/test_pmo_builder/test_functions_to_update_meta.py b/tests/test_pmo_builder/test_functions_to_update_meta.py new file mode 100755 index 0000000..138213b --- /dev/null +++ b/tests/test_pmo_builder/test_functions_to_update_meta.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python3 + +import os +import unittest +import pandas as pd +from pmotools.pmo_builder.functions_to_update_meta import PMOUpdater + + +class TestPMOUpdater(unittest.TestCase): + def setUp(self): + self.working_dir = os.path.dirname(os.path.abspath(__file__)) + + def test_check_if_date_yyyy_mm_or_yyyy_mm_dd(self): + self.assertFalse(PMOUpdater.check_if_date_yyyy_mm_or_yyyy_mm_dd("2023/11/24")) + self.assertFalse(PMOUpdater.check_if_date_yyyy_mm_or_yyyy_mm_dd("11-24-2023")) + self.assertFalse(PMOUpdater.check_if_date_yyyy_mm_or_yyyy_mm_dd("invalid-date")) + + self.assertTrue(PMOUpdater.check_if_date_yyyy_mm_or_yyyy_mm_dd("2023-11-24")) + self.assertTrue(PMOUpdater.check_if_date_yyyy_mm_or_yyyy_mm_dd("2023-11")) + + def test_update_specimen_meta_with_traveler_info(self): + test_pmo = { + "specimen_info": [{"specimen_name": "spec1"}, {"specimen_name": "spec2"}], + } + traveler_info = pd.DataFrame( + { + "specimen_name": ["spec1", "spec1", "spec2"], + "travel_country": ["Kenya", "Kenya", "Tanzania"], + "travel_start_date": ["2024-01", "2024-04", "2024-02-15"], + "travel_end_date": ["2024-02", "2024-06", "2024-02-27"], + } + ) + + PMOUpdater.update_specimen_meta_with_traveler_info(test_pmo, traveler_info) + test_out_pmo = { + "specimen_info": [ + { + "specimen_name": "spec1", + "travel_out_six_month": [ + { + "travel_country": "Kenya", + "travel_start_date": "2024-01", + "travel_end_date": "2024-02", + }, + { + "travel_country": "Kenya", + "travel_start_date": "2024-04", + "travel_end_date": "2024-06", + }, + ], + }, + { + "specimen_name": "spec2", + "travel_out_six_month": [ + { + "travel_country": "Tanzania", + "travel_start_date": "2024-02-15", + "travel_end_date": "2024-02-27", + } + ], + }, + ] + } + self.assertEqual(test_out_pmo, test_pmo) + + def test_update_specimen_meta_with_traveler_info_raises(self): + test_pmo = { + "specimen_info": [{"specimen_name": "spec1"}, {"specimen_name": "spec2"}], + } + traveler_info = pd.DataFrame( + { + "specimen_name": ["spec1", "spec2"], + "travel_country": ["Kenya", "Tanzania"], + "travel_start_date": ["24-01", "2024-02"], # BAD: "24-01" + "travel_end_date": ["2024-02-05", "2024-03"], + } + ) + + with self.assertRaises(ValueError): + PMOUpdater.update_specimen_meta_with_traveler_info(test_pmo, traveler_info) + + def test_update_specimen_meta_with_traveler_info_with_optional(self): + test_pmo = { + "specimen_info": [{"specimen_name": "spec1"}, {"specimen_name": "spec2"}], + } + traveler_info = pd.DataFrame( + { + "specimen_name": ["spec1", "spec2"], + "travel_country": ["Kenya", "Tanzania"], + "travel_start_date": ["2024-01", "2024-02"], + "travel_end_date": ["2024-01-20", "2024-02-15"], + "bed_net": [0.50, 0.0], + "admin1": ["Nairobi", "Dar es Salaam"], + "admin2": ["SubCounty1", "SubCounty2"], + "admin3": ["Ward1", "Ward2"], + "latlon": ["-1.2921,36.8219", "-6.7924,39.2083"], + } + ) + + PMOUpdater.update_specimen_meta_with_traveler_info( + test_pmo, + traveler_info, + bed_net_usage_col="bed_net", + geo_admin1_col="admin1", + geo_admin2_col="admin2", + geo_admin3_col="admin3", + lat_lon_col="latlon", + ) + test_out_pmo = { + "specimen_info": [ + { + "specimen_name": "spec1", + "travel_out_six_month": [ + { + "travel_country": "Kenya", + "travel_start_date": "2024-01", + "travel_end_date": "2024-01-20", + "bed_net": 0.5, + "admin1": "Nairobi", + "admin2": "SubCounty1", + "admin3": "Ward1", + "latlon": "-1.2921,36.8219", + } + ], + }, + { + "specimen_name": "spec2", + "travel_out_six_month": [ + { + "travel_country": "Tanzania", + "travel_start_date": "2024-02", + "travel_end_date": "2024-02-15", + "bed_net": 0.0, + "admin1": "Dar es Salaam", + "admin2": "SubCounty2", + "admin3": "Ward2", + "latlon": "-6.7924,39.2083", + } + ], + }, + ] + } + self.assertEqual(test_out_pmo, test_pmo) + + def test_update_specimen_meta_with_traveler_info_with_optional_replace_old(self): + test_pmo = { + "specimen_info": [ + { + "specimen_name": "spec1", + "travel_out_six_month": [ + { + "travel_country": "Kenya", + "travel_start_date": "2024-01", + "travel_end_date": "2024-02", + }, + { + "travel_country": "Kenya", + "travel_start_date": "2024-04", + "travel_end_date": "2024-06", + }, + ], + }, + {"specimen_name": "spec2"}, + ], + } + traveler_info = pd.DataFrame( + { + "specimen_name": ["spec1", "spec2"], + "travel_country": ["Kenya", "Tanzania"], + "travel_start_date": ["2024-01", "2024-02"], + "travel_end_date": ["2024-01-20", "2024-02-15"], + "bed_net": [0.50, 0.0], + "admin1": ["Nairobi", "Dar es Salaam"], + "admin2": ["SubCounty1", "SubCounty2"], + "admin3": ["Ward1", "Ward2"], + "latlon": ["-1.2921,36.8219", "-6.7924,39.2083"], + } + ) + + PMOUpdater.update_specimen_meta_with_traveler_info( + test_pmo, + traveler_info, + bed_net_usage_col="bed_net", + geo_admin1_col="admin1", + geo_admin2_col="admin2", + geo_admin3_col="admin3", + lat_lon_col="latlon", + replace_current_traveler_info=True, + ) + test_out_pmo = { + "specimen_info": [ + { + "specimen_name": "spec1", + "travel_out_six_month": [ + { + "travel_country": "Kenya", + "travel_start_date": "2024-01", + "travel_end_date": "2024-01-20", + "bed_net": 0.5, + "admin1": "Nairobi", + "admin2": "SubCounty1", + "admin3": "Ward1", + "latlon": "-1.2921,36.8219", + } + ], + }, + { + "specimen_name": "spec2", + "travel_out_six_month": [ + { + "travel_country": "Tanzania", + "travel_start_date": "2024-02", + "travel_end_date": "2024-02-15", + "bed_net": 0.0, + "admin1": "Dar es Salaam", + "admin2": "SubCounty2", + "admin3": "Ward2", + "latlon": "-6.7924,39.2083", + } + ], + }, + ] + } + self.assertEqual(test_out_pmo, test_pmo) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_pmo_engine/test_pmo_processor.py b/tests/test_pmo_engine/test_pmo_processor.py index e7fd833..b6e0fba 100755 --- a/tests/test_pmo_engine/test_pmo_processor.py +++ b/tests/test_pmo_engine/test_pmo_processor.py @@ -873,213 +873,6 @@ def test_get_panel_names(self): names = PMOProcessor.get_panel_names(pmo_data_combined) self.assertEqual(["heomev1"], names) - def test_update_specimen_meta_with_traveler_info(self): - test_pmo = { - "specimen_info": [{"specimen_name": "spec1"}, {"specimen_name": "spec2"}], - } - traveler_info = pd.DataFrame( - { - "specimen_name": ["spec1", "spec1", "spec2"], - "travel_country": ["Kenya", "Kenya", "Tanzania"], - "travel_start_date": ["2024-01", "2024-04", "2024-02-15"], - "travel_end_date": ["2024-02", "2024-06", "2024-02-27"], - } - ) - - PMOProcessor.update_specimen_meta_with_traveler_info(test_pmo, traveler_info) - test_out_pmo = { - "specimen_info": [ - { - "specimen_name": "spec1", - "travel_out_six_month": [ - { - "travel_country": "Kenya", - "travel_start_date": "2024-01", - "travel_end_date": "2024-02", - }, - { - "travel_country": "Kenya", - "travel_start_date": "2024-04", - "travel_end_date": "2024-06", - }, - ], - }, - { - "specimen_name": "spec2", - "travel_out_six_month": [ - { - "travel_country": "Tanzania", - "travel_start_date": "2024-02-15", - "travel_end_date": "2024-02-27", - } - ], - }, - ] - } - self.assertEqual(test_out_pmo, test_pmo) - - def test_update_specimen_meta_with_traveler_info_raises(self): - test_pmo = { - "specimen_info": [{"specimen_name": "spec1"}, {"specimen_name": "spec2"}], - } - traveler_info = pd.DataFrame( - { - "specimen_name": ["spec1", "spec2"], - "travel_country": ["Kenya", "Tanzania"], - "travel_start_date": ["24-01", "2024-02"], # BAD: "24-01" - "travel_end_date": ["2024-02-05", "2024-03"], - } - ) - - with self.assertRaises(ValueError): - PMOProcessor.update_specimen_meta_with_traveler_info( - test_pmo, traveler_info - ) - - def test_update_specimen_meta_with_traveler_info_with_optional(self): - test_pmo = { - "specimen_info": [{"specimen_name": "spec1"}, {"specimen_name": "spec2"}], - } - traveler_info = pd.DataFrame( - { - "specimen_name": ["spec1", "spec2"], - "travel_country": ["Kenya", "Tanzania"], - "travel_start_date": ["2024-01", "2024-02"], - "travel_end_date": ["2024-01-20", "2024-02-15"], - "bed_net": [0.50, 0.0], - "admin1": ["Nairobi", "Dar es Salaam"], - "admin2": ["SubCounty1", "SubCounty2"], - "admin3": ["Ward1", "Ward2"], - "latlon": ["-1.2921,36.8219", "-6.7924,39.2083"], - } - ) - - PMOProcessor.update_specimen_meta_with_traveler_info( - test_pmo, - traveler_info, - bed_net_usage_col="bed_net", - geo_admin1_col="admin1", - geo_admin2_col="admin2", - geo_admin3_col="admin3", - lat_lon_col="latlon", - ) - test_out_pmo = { - "specimen_info": [ - { - "specimen_name": "spec1", - "travel_out_six_month": [ - { - "travel_country": "Kenya", - "travel_start_date": "2024-01", - "travel_end_date": "2024-01-20", - "bed_net": 0.5, - "admin1": "Nairobi", - "admin2": "SubCounty1", - "admin3": "Ward1", - "latlon": "-1.2921,36.8219", - } - ], - }, - { - "specimen_name": "spec2", - "travel_out_six_month": [ - { - "travel_country": "Tanzania", - "travel_start_date": "2024-02", - "travel_end_date": "2024-02-15", - "bed_net": 0.0, - "admin1": "Dar es Salaam", - "admin2": "SubCounty2", - "admin3": "Ward2", - "latlon": "-6.7924,39.2083", - } - ], - }, - ] - } - self.assertEqual(test_out_pmo, test_pmo) - - def test_update_specimen_meta_with_traveler_info_with_optional_replace_old(self): - test_pmo = { - "specimen_info": [ - { - "specimen_name": "spec1", - "travel_out_six_month": [ - { - "travel_country": "Kenya", - "travel_start_date": "2024-01", - "travel_end_date": "2024-02", - }, - { - "travel_country": "Kenya", - "travel_start_date": "2024-04", - "travel_end_date": "2024-06", - }, - ], - }, - {"specimen_name": "spec2"}, - ], - } - traveler_info = pd.DataFrame( - { - "specimen_name": ["spec1", "spec2"], - "travel_country": ["Kenya", "Tanzania"], - "travel_start_date": ["2024-01", "2024-02"], - "travel_end_date": ["2024-01-20", "2024-02-15"], - "bed_net": [0.50, 0.0], - "admin1": ["Nairobi", "Dar es Salaam"], - "admin2": ["SubCounty1", "SubCounty2"], - "admin3": ["Ward1", "Ward2"], - "latlon": ["-1.2921,36.8219", "-6.7924,39.2083"], - } - ) - - PMOProcessor.update_specimen_meta_with_traveler_info( - test_pmo, - traveler_info, - bed_net_usage_col="bed_net", - geo_admin1_col="admin1", - geo_admin2_col="admin2", - geo_admin3_col="admin3", - lat_lon_col="latlon", - replace_current_traveler_info=True, - ) - test_out_pmo = { - "specimen_info": [ - { - "specimen_name": "spec1", - "travel_out_six_month": [ - { - "travel_country": "Kenya", - "travel_start_date": "2024-01", - "travel_end_date": "2024-01-20", - "bed_net": 0.5, - "admin1": "Nairobi", - "admin2": "SubCounty1", - "admin3": "Ward1", - "latlon": "-1.2921,36.8219", - } - ], - }, - { - "specimen_name": "spec2", - "travel_out_six_month": [ - { - "travel_country": "Tanzania", - "travel_start_date": "2024-02", - "travel_end_date": "2024-02-15", - "bed_net": 0.0, - "admin1": "Dar es Salaam", - "admin2": "SubCounty2", - "admin3": "Ward2", - "latlon": "-6.7924,39.2083", - } - ], - }, - ] - } - self.assertEqual(test_out_pmo, test_pmo) - if __name__ == "__main__": unittest.main() From 3794d237ec8d489319cffe8cccf7f5be073e7420 Mon Sep 17 00:00:00 2001 From: Nicholas Hathaway Date: Mon, 24 Nov 2025 19:18:31 -0800 Subject: [PATCH 3/3] changed name of file; --- .../pmo_builder/{functions_to_update_meta.py => pmo_updater.py} | 0 .../{test_functions_to_update_meta.py => test_pmo_updater.py} | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename src/pmotools/pmo_builder/{functions_to_update_meta.py => pmo_updater.py} (100%) rename tests/test_pmo_builder/{test_functions_to_update_meta.py => test_pmo_updater.py} (99%) diff --git a/src/pmotools/pmo_builder/functions_to_update_meta.py b/src/pmotools/pmo_builder/pmo_updater.py similarity index 100% rename from src/pmotools/pmo_builder/functions_to_update_meta.py rename to src/pmotools/pmo_builder/pmo_updater.py diff --git a/tests/test_pmo_builder/test_functions_to_update_meta.py b/tests/test_pmo_builder/test_pmo_updater.py similarity index 99% rename from tests/test_pmo_builder/test_functions_to_update_meta.py rename to tests/test_pmo_builder/test_pmo_updater.py index 138213b..e707187 100755 --- a/tests/test_pmo_builder/test_functions_to_update_meta.py +++ b/tests/test_pmo_builder/test_pmo_updater.py @@ -3,7 +3,7 @@ import os import unittest import pandas as pd -from pmotools.pmo_builder.functions_to_update_meta import PMOUpdater +from pmotools.pmo_builder.pmo_updater import PMOUpdater class TestPMOUpdater(unittest.TestCase):