-
Notifications
You must be signed in to change notification settings - Fork 0
added function to add travel info; #58
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,125 @@ | ||
| #!/usr/bin/env python3 | ||
|
|
||
| import pandas as pd | ||
| from pmotools.pmo_engine.pmo_processor import PMOProcessor | ||
| from datetime import datetime | ||
|
|
||
|
|
||
| class PMOUpdater(object): | ||
| @staticmethod | ||
| def check_if_date_yyyy_mm_or_yyyy_mm_dd(date_string: str) -> bool: | ||
| """ | ||
| Checks if a string is in YYYY-MM or YYYY-MM-DD format. | ||
| :param date_string: the string to be checked | ||
| """ | ||
| try: | ||
| datetime.strptime(date_string, "%Y-%m-%d") | ||
| return True # Matches YYYY-MM-DD | ||
| except ValueError: | ||
| try: | ||
| datetime.strptime(date_string, "%Y-%m") | ||
| return True # Matches YYYY-MM | ||
| except ValueError: | ||
| return False # Does not match either format | ||
|
|
||
| @staticmethod | ||
| def update_specimen_meta_with_traveler_info( | ||
| pmo, | ||
| traveler_info: pd.DataFrame, | ||
| specimen_name_col: str = "specimen_name", | ||
| travel_country_col: str = "travel_country", | ||
| travel_start_col: str = "travel_start_date", | ||
| travel_end_col: str = "travel_end_date", | ||
| bed_net_usage_col: str = None, | ||
| geo_admin1_col: str = None, | ||
| geo_admin2_col: str = None, | ||
| geo_admin3_col: str = None, | ||
| lat_lon_col: str = None, | ||
| replace_current_traveler_info: bool = False, | ||
| ): | ||
| """ | ||
| Update a PMO's specimen's metadata with travel info | ||
| :param pmo: the PMO to update, will directly modify this PMO | ||
| :param traveler_info: the traveler info | ||
| :param specimen_name_col: the specimen name column within the traveler input table | ||
| :param travel_country_col: the column name containing the traveled to country | ||
| :param travel_start_col: the column name containing the traveled start date, format YYYY-MM-DD or YYYY-MM | ||
| :param travel_end_col: the column name containing the traveled end date, format YYYY-MM-DD or YYYY-MM | ||
| :param bed_net_usage_col: (Optional) a number between 0 - 1 for rough frequency of bednet usage while traveling | ||
| :param geo_admin1_col: (Optional) the column name containing the traveled to country admin level 1 info | ||
| :param geo_admin2_col: (Optional) the column name containing the traveled to country admin level 2 info | ||
| :param geo_admin3_col: (Optional) the column name containing the traveled to country admin level 3 info | ||
| :param lat_lon_col: (Optional) the latitude and longitude column name containing the region traveled to latitude and longitude | ||
| :param replace_current_traveler_info: whether to replace current travel info | ||
| :return: a reference to the updated PMO | ||
| """ | ||
| required_cols = [ | ||
| specimen_name_col, | ||
| travel_country_col, | ||
| travel_start_col, | ||
| travel_end_col, | ||
| ] | ||
| if bed_net_usage_col is not None: | ||
| required_cols.append(bed_net_usage_col) | ||
| if geo_admin1_col is not None: | ||
| required_cols.append(geo_admin1_col) | ||
| if geo_admin2_col is not None: | ||
| required_cols.append(geo_admin2_col) | ||
| if geo_admin3_col is not None: | ||
| required_cols.append(geo_admin3_col) | ||
| if lat_lon_col is not None: | ||
| required_cols.append(lat_lon_col) | ||
|
|
||
| if not set(required_cols).issubset(traveler_info.columns): | ||
| raise Exception( | ||
| "missing traveler_info columns: " + ",".join(required_cols), | ||
| " columns in table: " + ",".join(traveler_info.columns), | ||
| ) | ||
|
|
||
| specimen_names_in_pmo = set(PMOProcessor.get_specimen_names(pmo)) | ||
| specimen_names_in_traveler_info = set( | ||
| traveler_info[specimen_name_col].astype(str).tolist() | ||
| ) | ||
|
|
||
| # check to see if provided traveler info for a specimen that cannot be found in PMO | ||
| missing_traveler_specs = specimen_names_in_traveler_info - specimen_names_in_pmo | ||
|
|
||
| if missing_traveler_specs: | ||
| raise ValueError( | ||
| f"Provided traveler info for the following specimens but they are missing from the PMO: {sorted(missing_traveler_specs)}" | ||
| ) | ||
| traveler_info_records = traveler_info[required_cols].to_dict(orient="records") | ||
| spec_indexs = PMOProcessor.get_index_key_of_specimen_names(pmo) | ||
|
|
||
| # prep traveler info lists, clear the list if we are replacing or start an empty list to append to if none exist already | ||
| for specimen_name in specimen_names_in_traveler_info: | ||
| if ( | ||
| replace_current_traveler_info | ||
| or "travel_out_six_month" | ||
| not in pmo["specimen_info"][spec_indexs[specimen_name]] | ||
| ): | ||
| pmo["specimen_info"][spec_indexs[specimen_name]][ | ||
| "travel_out_six_month" | ||
| ] = [] | ||
|
|
||
| for travel_rec in traveler_info_records: | ||
| specimen_name = str(travel_rec[specimen_name_col]) | ||
| # Validate date formats | ||
| for date_col in (travel_start_col, travel_end_col): | ||
| val = travel_rec[date_col] | ||
| if pd.isna(val): | ||
| raise ValueError( | ||
| f"Missing required date value in column '{date_col}' for specimen '{specimen_name}'" | ||
| ) | ||
| val_str = str(val) | ||
| if not PMOUpdater.check_if_date_yyyy_mm_or_yyyy_mm_dd(val_str): | ||
| raise ValueError( | ||
| f"Invalid date format in '{date_col}' for specimen '{specimen_name}': '{val_str}'. " | ||
| f"Expected YYYY-MM or YYYY-MM-DD" | ||
| ) | ||
| # add in travel_rec | ||
| travel_rec.pop(specimen_name_col, None) | ||
| pmo["specimen_info"][spec_indexs[specimen_name]][ | ||
| "travel_out_six_month" | ||
| ].append(travel_rec) | ||
| return pmo |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,228 @@ | ||
| #!/usr/bin/env python3 | ||
|
|
||
| import os | ||
| import unittest | ||
| import pandas as pd | ||
| from pmotools.pmo_builder.pmo_updater import PMOUpdater | ||
|
|
||
|
|
||
| class TestPMOUpdater(unittest.TestCase): | ||
| def setUp(self): | ||
| self.working_dir = os.path.dirname(os.path.abspath(__file__)) | ||
|
|
||
| def test_check_if_date_yyyy_mm_or_yyyy_mm_dd(self): | ||
| self.assertFalse(PMOUpdater.check_if_date_yyyy_mm_or_yyyy_mm_dd("2023/11/24")) | ||
| self.assertFalse(PMOUpdater.check_if_date_yyyy_mm_or_yyyy_mm_dd("11-24-2023")) | ||
| self.assertFalse(PMOUpdater.check_if_date_yyyy_mm_or_yyyy_mm_dd("invalid-date")) | ||
|
|
||
| self.assertTrue(PMOUpdater.check_if_date_yyyy_mm_or_yyyy_mm_dd("2023-11-24")) | ||
| self.assertTrue(PMOUpdater.check_if_date_yyyy_mm_or_yyyy_mm_dd("2023-11")) | ||
|
|
||
| def test_update_specimen_meta_with_traveler_info(self): | ||
| test_pmo = { | ||
| "specimen_info": [{"specimen_name": "spec1"}, {"specimen_name": "spec2"}], | ||
| } | ||
| traveler_info = pd.DataFrame( | ||
| { | ||
| "specimen_name": ["spec1", "spec1", "spec2"], | ||
| "travel_country": ["Kenya", "Kenya", "Tanzania"], | ||
| "travel_start_date": ["2024-01", "2024-04", "2024-02-15"], | ||
| "travel_end_date": ["2024-02", "2024-06", "2024-02-27"], | ||
| } | ||
| ) | ||
|
|
||
| PMOUpdater.update_specimen_meta_with_traveler_info(test_pmo, traveler_info) | ||
| test_out_pmo = { | ||
| "specimen_info": [ | ||
| { | ||
| "specimen_name": "spec1", | ||
| "travel_out_six_month": [ | ||
| { | ||
| "travel_country": "Kenya", | ||
| "travel_start_date": "2024-01", | ||
| "travel_end_date": "2024-02", | ||
| }, | ||
| { | ||
| "travel_country": "Kenya", | ||
| "travel_start_date": "2024-04", | ||
| "travel_end_date": "2024-06", | ||
| }, | ||
| ], | ||
| }, | ||
| { | ||
| "specimen_name": "spec2", | ||
| "travel_out_six_month": [ | ||
| { | ||
| "travel_country": "Tanzania", | ||
| "travel_start_date": "2024-02-15", | ||
| "travel_end_date": "2024-02-27", | ||
| } | ||
| ], | ||
| }, | ||
| ] | ||
| } | ||
| self.assertEqual(test_out_pmo, test_pmo) | ||
|
|
||
| def test_update_specimen_meta_with_traveler_info_raises(self): | ||
| test_pmo = { | ||
| "specimen_info": [{"specimen_name": "spec1"}, {"specimen_name": "spec2"}], | ||
| } | ||
| traveler_info = pd.DataFrame( | ||
| { | ||
| "specimen_name": ["spec1", "spec2"], | ||
| "travel_country": ["Kenya", "Tanzania"], | ||
| "travel_start_date": ["24-01", "2024-02"], # BAD: "24-01" | ||
| "travel_end_date": ["2024-02-05", "2024-03"], | ||
| } | ||
| ) | ||
|
|
||
| with self.assertRaises(ValueError): | ||
| PMOUpdater.update_specimen_meta_with_traveler_info(test_pmo, traveler_info) | ||
|
|
||
| def test_update_specimen_meta_with_traveler_info_with_optional(self): | ||
| test_pmo = { | ||
| "specimen_info": [{"specimen_name": "spec1"}, {"specimen_name": "spec2"}], | ||
| } | ||
| traveler_info = pd.DataFrame( | ||
| { | ||
| "specimen_name": ["spec1", "spec2"], | ||
| "travel_country": ["Kenya", "Tanzania"], | ||
| "travel_start_date": ["2024-01", "2024-02"], | ||
| "travel_end_date": ["2024-01-20", "2024-02-15"], | ||
| "bed_net": [0.50, 0.0], | ||
| "admin1": ["Nairobi", "Dar es Salaam"], | ||
| "admin2": ["SubCounty1", "SubCounty2"], | ||
| "admin3": ["Ward1", "Ward2"], | ||
| "latlon": ["-1.2921,36.8219", "-6.7924,39.2083"], | ||
| } | ||
| ) | ||
|
|
||
| PMOUpdater.update_specimen_meta_with_traveler_info( | ||
| test_pmo, | ||
| traveler_info, | ||
| bed_net_usage_col="bed_net", | ||
| geo_admin1_col="admin1", | ||
| geo_admin2_col="admin2", | ||
| geo_admin3_col="admin3", | ||
| lat_lon_col="latlon", | ||
| ) | ||
| test_out_pmo = { | ||
| "specimen_info": [ | ||
| { | ||
| "specimen_name": "spec1", | ||
| "travel_out_six_month": [ | ||
| { | ||
| "travel_country": "Kenya", | ||
| "travel_start_date": "2024-01", | ||
| "travel_end_date": "2024-01-20", | ||
| "bed_net": 0.5, | ||
| "admin1": "Nairobi", | ||
| "admin2": "SubCounty1", | ||
| "admin3": "Ward1", | ||
| "latlon": "-1.2921,36.8219", | ||
| } | ||
| ], | ||
| }, | ||
| { | ||
| "specimen_name": "spec2", | ||
| "travel_out_six_month": [ | ||
| { | ||
| "travel_country": "Tanzania", | ||
| "travel_start_date": "2024-02", | ||
| "travel_end_date": "2024-02-15", | ||
| "bed_net": 0.0, | ||
| "admin1": "Dar es Salaam", | ||
| "admin2": "SubCounty2", | ||
| "admin3": "Ward2", | ||
| "latlon": "-6.7924,39.2083", | ||
| } | ||
| ], | ||
| }, | ||
| ] | ||
| } | ||
| self.assertEqual(test_out_pmo, test_pmo) | ||
|
|
||
| def test_update_specimen_meta_with_traveler_info_with_optional_replace_old(self): | ||
| test_pmo = { | ||
| "specimen_info": [ | ||
| { | ||
| "specimen_name": "spec1", | ||
| "travel_out_six_month": [ | ||
| { | ||
| "travel_country": "Kenya", | ||
| "travel_start_date": "2024-01", | ||
| "travel_end_date": "2024-02", | ||
| }, | ||
| { | ||
| "travel_country": "Kenya", | ||
| "travel_start_date": "2024-04", | ||
| "travel_end_date": "2024-06", | ||
| }, | ||
| ], | ||
| }, | ||
| {"specimen_name": "spec2"}, | ||
| ], | ||
| } | ||
| traveler_info = pd.DataFrame( | ||
| { | ||
| "specimen_name": ["spec1", "spec2"], | ||
| "travel_country": ["Kenya", "Tanzania"], | ||
| "travel_start_date": ["2024-01", "2024-02"], | ||
| "travel_end_date": ["2024-01-20", "2024-02-15"], | ||
| "bed_net": [0.50, 0.0], | ||
| "admin1": ["Nairobi", "Dar es Salaam"], | ||
| "admin2": ["SubCounty1", "SubCounty2"], | ||
| "admin3": ["Ward1", "Ward2"], | ||
| "latlon": ["-1.2921,36.8219", "-6.7924,39.2083"], | ||
| } | ||
| ) | ||
|
|
||
| PMOUpdater.update_specimen_meta_with_traveler_info( | ||
| test_pmo, | ||
| traveler_info, | ||
| bed_net_usage_col="bed_net", | ||
| geo_admin1_col="admin1", | ||
| geo_admin2_col="admin2", | ||
| geo_admin3_col="admin3", | ||
| lat_lon_col="latlon", | ||
| replace_current_traveler_info=True, | ||
| ) | ||
| test_out_pmo = { | ||
| "specimen_info": [ | ||
| { | ||
| "specimen_name": "spec1", | ||
| "travel_out_six_month": [ | ||
| { | ||
| "travel_country": "Kenya", | ||
| "travel_start_date": "2024-01", | ||
| "travel_end_date": "2024-01-20", | ||
| "bed_net": 0.5, | ||
| "admin1": "Nairobi", | ||
| "admin2": "SubCounty1", | ||
| "admin3": "Ward1", | ||
| "latlon": "-1.2921,36.8219", | ||
| } | ||
| ], | ||
| }, | ||
| { | ||
| "specimen_name": "spec2", | ||
| "travel_out_six_month": [ | ||
| { | ||
| "travel_country": "Tanzania", | ||
| "travel_start_date": "2024-02", | ||
| "travel_end_date": "2024-02-15", | ||
| "bed_net": 0.0, | ||
| "admin1": "Dar es Salaam", | ||
| "admin2": "SubCounty2", | ||
| "admin3": "Ward2", | ||
| "latlon": "-6.7924,39.2083", | ||
| } | ||
| ], | ||
| }, | ||
| ] | ||
| } | ||
| self.assertEqual(test_out_pmo, test_pmo) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| unittest.main() |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.