Merge pull request #74 from stac-labs/action_network_additions

scwikla · web-flow · commit aedb64629bcb · 2025-06-17T13:50:14.000-07:00
Action Network: add more useful functions
diff --git a/src/stac_utils/action_network.py b/src/stac_utils/action_network.py
@@ -2,6 +2,11 @@
 import json
 import requests
 from .http import HTTPClient
+import pandas as pd
+import logging
+
+# logging
+logger = logging.getLogger(__name__)
 
 
 ROW_LIMIT = 10000
@@ -35,3 +40,142 @@ def check_response_for_rate_limit(
     ) -> [int, float, None]:
         """Checks ActionNetwork response for rate limit, always returns 1"""
         return 1
+
+    @staticmethod
+    def extract_action_network_id(identifiers: list[str]) -> str:
+        """
+        Action Network may have a list of identifiers for a given person.
+        This function grabs the Action Network ID.
+        Refer to https://actionnetwork.org/docs/v2/ for more information
+
+        :param identifiers: A list of Action Network identifier strings associated with a person.
+        :return: The extracted Action Network ID, or an empty string if an Action Network ID is not found.
+        """
+        for identifier in identifiers:
+            if identifier.startswith("action_network:"):
+                return identifier.split(":", 1)[1]
+        return ""
+
+    def create_people_dataframe(self, people_data: list[dict]) -> pd.DataFrame:
+        """
+        Given an iterable of Action Network people from the Action Network people endpoint, returns a pandas dataframe
+        with fields:
+            * action_network_id
+            * first_name
+            * last_name
+            * email_address
+            * phone
+            * zip5
+            * street_name
+            * city
+            * state
+        Refer to https://actionnetwork.org/docs/v2/people for more information
+
+        :param people_data: A list of people dictionaries returned by the Action Network API, from the people endpoint
+        :return: Pandas dataframe with person fields
+
+        """
+        rows = []
+        for person in people_data:
+            # common to all address fields
+            address = person.get("postal_addresses", [{}])[0]
+            rows.append(
+                {
+                    "action_network_id": self.extract_action_network_id(
+                        person.get("identifiers", [""])
+                    ),
+                    "first_name": person.get("given_name", ""),
+                    "last_name": person.get("family_name", ""),
+                    "email_address": person.get("email_addresses", [{}])[0].get(
+                        "address", ""
+                    ),
+                    "phone": person.get("phone_numbers", [{}])[0].get("number", ""),
+                    "zip5": address.get("postal_code", "")[:5],
+                    "street_name": address.get("address_lines", [""])[0],
+                    "city": address.get("locality", ""),
+                    "state": address.get("region", ""),
+                }
+            )
+        return pd.DataFrame(rows)
+
+    def paginate_endpoint(
+        self, base_endpoint: str, embedded_key: str, max_pages: int = None, **kwargs
+    ) -> list[dict]:
+        """
+        Generic pagination helper for Action Network endpoints that return the "_embedded" resource, which all endpoints
+        that are collections of items (i.e. forms, events, submissions, etc.) do
+
+        :param base_endpoint: the endpoint to paginate (i.e "forms" )
+        :param embedded_key: the expected key inside the "_embedded" object
+                             (i.e "osdi:submissions" for base_endpoint "forms/{form_id}/submissions"
+                              or   "osdi:forms" for base_endpoint "forms")
+        :param max_pages: optional parameter to limit the number of pages (can be used for testing)
+        :return: list of embedded items from all pages
+        """
+        results = []
+        page = 1
+
+        while True:
+            full_endpoint = f"{base_endpoint}?page={page}"
+            data = self.get(full_endpoint, **kwargs)
+
+            embedded = data.get("_embedded", {})
+            items = embedded.get(embedded_key, [])
+            if not items:
+                # should flag end of pagination
+                logger.debug(f"No items found at page {page} for key '{embedded_key}'")
+                break
+
+            results.extend(items)
+
+            if max_pages is not None and page >= max_pages:
+                break
+
+            page += 1
+
+        return results
+
+    def fetch_related_people(
+        self, resource: dict, person_link_keys: list[str] = None, **kwargs
+    ) -> list[dict]:
+        """
+        Given a resource dict (i.e. a submission or signup), fetches all related person records
+        from the Action Network API by following links in the `_links` section.
+
+        Note this will only work for Action Network resources that have a person reference in the _links section
+
+        When using this function, please add error handling in the caller function, for action_network_ids not found
+            * requests.exceptions.HTTPError: 404 Client Error
+
+        :param resource: the resource dict containing `_links`
+        :param person_link_keys: optional list of keys in `_links` that indicate person links;
+                                 defaults to ['osdi:person'] but can include others if relevant (i.e. osdi:creator)
+        :return: list of person dicts fetched
+        """
+        # default to 'osdi:person'
+        if person_link_keys is None:
+            person_link_keys = ["osdi:person"]
+
+        people = []
+        links = resource.get("_links", {})
+
+        # go through each relevant key in _link for the signups
+        for key in person_link_keys:
+            link_info = links.get(key)
+            if not link_info:
+                continue
+
+            # assumes 1 href per _link key
+            href = link_info.get("href")
+
+            if not href or "people/" not in href:
+                continue
+
+            # Extract action network id from url
+            action_network_id = href.split("people/")[-1]
+
+            # this can lead to errors, so log them in the caller function ...
+            person = self.get(f"people/{action_network_id}", **kwargs)
+            people.append(person)
+
+        return people
diff --git a/src/tests/test_action_network.py b/src/tests/test_action_network.py
@@ -1,13 +1,14 @@
 import os
 import unittest
 from unittest.mock import MagicMock, patch
-
-from src.stac_utils.action_network import ActionNetworkClient
+import pandas as pd
+from src.stac_utils.action_network import ActionNetworkClient, logger
 
 
 class TestActionNetworkClient(unittest.TestCase):
     def setUp(self) -> None:
         self.test_client = ActionNetworkClient()
+        self.test_logger = logger
 
     def test_init_env_keys(self):
         """Test that client initializes with environmental keys"""
@@ -42,3 +43,201 @@ def test_check_response_for_rate_limit(self):
         """Test that response has rate limit of 1"""
         test_client = ActionNetworkClient("foo")
         self.assertEqual(1, test_client.check_response_for_rate_limit(None))
+
+    def test_extract_action_network_id_valid(self):
+        """Test that the correct Action Network ID is extracted"""
+        identifiers = ["not_an:123aabb", "action_network:foo12bar", "random_id:120930a"]
+        output = self.test_client.extract_action_network_id(identifiers)
+        self.assertEqual(output, "foo12bar")
+
+    def test_extract_action_network_id_empty(self):
+        """test for empty string when no Action Network ID"""
+        identifiers = ["not_an:123aabb", "random_id:120930a"]
+        output = self.test_client.extract_action_network_id(identifiers)
+        self.assertEqual(output, "")
+
+    def test_create_people_dataframe(self):
+        """Test that create_people_dataframe function correctly extracts/maps all fields from person record into
+        the resultant DataFrame."""
+        people_data = [
+            {
+                "identifiers": ["action_network:askjdaskdjfh12", "spam:fo1212o"],
+                "given_name": "fake_first_name",
+                "family_name": "fake_last_name",
+                "email_addresses": [{"address": "fake@example.com"}],
+                "phone_numbers": [{"number": "999-999-9999"}],
+                "postal_addresses": [
+                    {
+                        "postal_code": "90210-1234",
+                        "address_lines": ["999 Fake Street"],
+                        "locality": "Beverly Hills",
+                        "region": "CA",
+                    }
+                ],
+            }
+        ]
+
+        expected_df = pd.DataFrame(
+            [
+                {
+                    "action_network_id": "askjdaskdjfh12",
+                    "first_name": "fake_first_name",
+                    "last_name": "fake_last_name",
+                    "email_address": "fake@example.com",
+                    "phone": "999-999-9999",
+                    "zip5": "90210",
+                    "street_name": "999 Fake Street",
+                    "city": "Beverly Hills",
+                    "state": "CA",
+                }
+            ]
+        )
+
+        result_df = self.test_client.create_people_dataframe(people_data)
+        # preserve column/row order when comparing dfs
+        self.assertEqual(
+            expected_df.to_dict(orient="records"), result_df.to_dict(orient="records")
+        )
+
+    @patch.object(ActionNetworkClient, "get")
+    def test_paginate_endpoint_valid(self, mock_get):
+        """
+        Test to see if the paginate_endpoint function gathers results across multiple pages using the embedded_key
+        param.
+        """
+        # check paginated API responses
+        mock_get.side_effect = [
+            {"_embedded": {"osdi:forms": [{"val": 1}, {"val": 2}]}},
+            {"_embedded": {"osdi:forms": [{"val": 3}]}},
+            {"_embedded": {"osdi:forms": [{"val": 4}]}},
+        ]
+
+        # checks if the results properly grab only first 2 pages of results, based on mock calls
+        results = self.test_client.paginate_endpoint(
+            "forms", embedded_key="osdi:forms", max_pages=2
+        )
+        self.assertEqual(results, [{"val": 1}, {"val": 2}, {"val": 3}])
+        self.assertEqual(mock_get.call_count, 2)
+
+    @patch.object(logger, "debug")
+    @patch.object(ActionNetworkClient, "get")
+    def test_paginate_endpoint_empty(self, mock_get, mock_debug):
+        """
+        check for logger debug message when the next page's _embedded list is empty
+        """
+        mock_get.side_effect = [
+            {"_embedded": {"osdi:forms": [{"val": 1}, {"val": 2}]}},
+            {"_embedded": {"osdi:forms": [{"val": 3}]}},
+            {"_embedded": {"osdi:forms": []}},
+        ]
+        self.test_client.paginate_endpoint("forms", embedded_key="osdi:forms")
+        mock_debug.assert_called_with("No items found at page 3 for key 'osdi:forms'")
+
+    @patch.object(ActionNetworkClient, "get")
+    def test_fetch_related_people_valid(self, mock_get):
+        """
+        Tests if the fetch_related_people correctly fetches and returns related people records
+        """
+        # fake data for GET calls (not all data in a person dict, but doesnt matter for testing)
+        person_1 = {
+            "identifiers": ["action_network:12ab234"],
+            "given_name": "Fake",
+            "family_name": "Name",
+        }
+
+        person_2 = {
+            "identifiers": ["action_network:182awe"],
+            "given_name": "Mock",
+            "family_name": "Test",
+        }
+
+        # setup mock_get to return these person dicts in order
+        mock_get.side_effect = [person_1, person_2]
+
+        # build resoruce, including two keys for fun to test
+        resource = {
+            "_links": {
+                "osdi:person": {
+                    "href": "https://actionnetwork.org/api/v2/people/12ab234"
+                },
+                "osdi:creator": {
+                    "href": "https://actionnetwork.org/api/v2/people/182awe"
+                },
+            }
+        }
+
+        # get output of function
+        people = self.test_client.fetch_related_people(
+            resource, person_link_keys=["osdi:person", "osdi:creator"]
+        )
+
+        # check for GET request
+        mock_get.assert_any_call("people/12ab234")
+        mock_get.assert_any_call("people/182awe")
+
+        # check that the output matches the mocked responses
+        self.assertEqual(people, [person_1, person_2])
+
+    @patch.object(ActionNetworkClient, "get")
+    def test_fetch_related_people_default_key(self, mock_get):
+        """
+        Tests if person_link_keys defaults to ["osdi:person"]
+        """
+        # fake data for GET calls (not all data in a person dict, but doesnt matter for testing)
+        person_1 = {
+            "identifiers": ["action_network:12ab234"],
+            "given_name": "Fake",
+            "family_name": "Name",
+        }
+
+        # setup mock_get to return these person dicts in order
+        mock_get.side_effect = [person_1]
+
+        # build resoruce, including two keys for fun to test
+        resource = {
+            "_links": {
+                "osdi:person": {
+                    "href": "https://actionnetwork.org/api/v2/people/12ab234"
+                },
+            }
+        }
+
+        # get output of function, using default key
+        people = self.test_client.fetch_related_people(resource)
+
+        # check for GET request
+        mock_get.assert_any_call("people/12ab234")
+
+        # check that the output matches the mocked responses
+        self.assertEqual(people, [person_1])
+
+    @patch.object(ActionNetworkClient, "get")
+    def test_fetch_related_people_link_info_none(self, mock_get):
+        """Test that fetch_related_people skips if link_info is None."""
+        resource = {
+            "_links": {
+                "osdi:person": None,
+            }
+        }
+        people = self.test_client.fetch_related_people(
+            resource, person_link_keys=["osdi:person"]
+        )
+        self.assertEqual(people, [])
+        mock_get.assert_not_called()
+
+    @patch.object(ActionNetworkClient, "get")
+    def test_fetch_related_people_href_empty_or_missing_correct_endpoint(self, mock_get):
+        """Test that fetch_related_people skips if href key is missing or doesn't contain 'people/'."""
+        resource = {
+            "_links": {
+                "osdi:person": {"href": None},
+                "osdi:creator": {
+                    "href": "https://actionnetwork.org/api/v2/superfake/spam"
+                },
+            }
+        }
+        people = self.test_client.fetch_related_people(
+            resource, person_link_keys=["osdi:person", "osdi:creator"]
+        )
+        self.assertEqual(people, [])
+        mock_get.assert_not_called()