Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 76 additions & 14 deletions .github/workflows/build-map-data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ on:
default: ''

jobs:
build:
download:
runs-on: ubuntu-latest
env:
PUSHOVER_API_KEY: ${{ secrets.PUSHOVER_API_KEY }}
PUSHOVER_USER_KEY: ${{ secrets.PUSHOVER_USER_KEY }}
outputs:
exit_code: ${{ steps.dl-shp.outputs.exit_code }}
shp_year: ${{ steps.info.outputs.shp_year }}
state_shp: ${{ steps.info.outputs.state_shp }}
county_shp: ${{ steps.info.outputs.county_shp }}

steps:
- name: Checkout
Expand All @@ -38,6 +40,38 @@ jobs:
run: |
python data-raw/scripts/shapefiles.py ${{ inputs.year }}

- name: Save shapefile info
id: info
run: |
echo "shp_year=${{ env.shp_year }}" >> "$GITHUB_OUTPUT"
echo "state_shp=${{ env.state_shp }}" >> "$GITHUB_OUTPUT"
echo "county_shp=${{ env.county_shp }}" >> "$GITHUB_OUTPUT"

- name: Upload shapefiles
if: steps.dl-shp.outputs.exit_code == '0'
uses: actions/upload-artifact@v4
with:
name: shapefiles
path: data-raw/shapefiles/${{ env.shp_year }}

process:
runs-on: ubuntu-latest
needs: download
if: needs.download.outputs.exit_code == '0'
outputs:
pr_url: ${{ steps.info.outputs.pr_url }}
pr_number: ${{ steps.info.outputs.pr_number }}

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Download shapefiles
uses: actions/download-artifact@v4
with:
name: shapefiles
path: data-raw/shapefiles/${{ needs.download.outputs.shp_year }}

- name: Setup R
uses: r-lib/actions/setup-r@v2

Expand All @@ -48,9 +82,9 @@ jobs:

- name: Modify shapefiles
env:
STATE_SHP: ${{ env.state_shp }}
COUNTY_SHP: ${{ env.county_shp }}
YEAR: ${{ env.shp_year }}
STATE_SHP: ${{ needs.download.outputs.state_shp }}
COUNTY_SHP: ${{ needs.download.outputs.county_shp }}
YEAR: ${{ needs.download.outputs.shp_year }}
run: |
input_dir <- file.path("data-raw", "shapefiles", Sys.getenv("YEAR"))
output_dir <- file.path("inst", "extdata", Sys.getenv("YEAR"))
Expand Down Expand Up @@ -80,7 +114,7 @@ jobs:
- name: Determine pull request parameters
id: pr-params
env:
YEAR: ${{ env.shp_year }}
YEAR: ${{ needs.download.outputs.shp_year }}
run: |
echo "branch_name=data-update/$YEAR" >> "$GITHUB_OUTPUT"
echo "pr_title=Add $YEAR map data" >> "$GITHUB_OUTPUT"
Expand All @@ -100,7 +134,7 @@ jobs:
token: ${{ secrets.BOT_PAT }}
author: ${{ secrets.BOT_USER }}
committer: ${{ secrets.BOT_USER }}
commit-message: "[automated] Add ${{ env.shp_year }} map data based on available shapefiles"
commit-message: "[automated] Add ${{ needs.download.outputs.shp_year }} map data based on available shapefiles"
branch: ${{ steps.pr-params.outputs.branch_name }}
title: ${{ steps.pr-params.outputs.pr_title }}
body: ${{ steps.pr-body.outputs.result }}
Expand All @@ -109,17 +143,45 @@ jobs:
labels: data update
delete-branch: true

- name: Save PR info
id: info
run: |
echo "pr_url=${{ steps.open-pr.outputs.pull-request-url }}" >> "$GITHUB_OUTPUT"
echo "pr_number=${{ steps.open-pr.outputs.pull-request-number }}" >> "$GITHUB_OUTPUT"

notify:
runs-on: ubuntu-latest
needs: [download, process]
if: always()
env:
PUSHOVER_API_KEY: ${{ secrets.PUSHOVER_API_KEY }}
PUSHOVER_USER_KEY: ${{ secrets.PUSHOVER_USER_KEY }}

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.9'
cache: 'pip'

- name: Install Python dependencies
run: pip install -r data-raw/scripts/requirements.txt

- name: Send success notification
if: needs.download.outputs.exit_code == '0' && needs.process.result == 'success'
run: |
python data-raw/scripts/pushover.py "✅ usmapdata has updated its data files, a PR review is needed: <a href=\"${{ steps.open-pr.outputs.pull-request-url }}\">PR #${{ steps.open-pr.outputs.pull-request-number }}</a>"
python data-raw/scripts/pushover.py "✅ usmapdata has updated its data files, a PR review is needed: <a href=\"${{ needs.process.outputs.pr_url }}\">PR #${{ needs.process.outputs.pr_number }}</a>"

- name: Send data not found notification
if: ${{ failure() && steps.dl-shp.outputs.exit_code == '404' }}
if: needs.download.outputs.exit_code == '404'
run: |
python data-raw/scripts/pushover.py "⚠️ usmapdata failed to find map data files for ${{ env.shp_year }}." "LOW"
python data-raw/scripts/pushover.py "⚠️ usmapdata failed to find map data files for ${{ needs.download.outputs.shp_year }}." "LOW"

- name: Send failure notification
if: ${{ failure() && steps.dl-shp.outputs.exit_code != '404' }}
if: needs.download.outputs.exit_code != '0' && needs.download.outputs.exit_code != '404'
run: |
python data-raw/scripts/pushover.py "❌ usmapdata failed to update map data files. (error: ${{ steps.dl-shp.outputs.exit_code }})" "LOW"
python data-raw/scripts/pushover.py "❌ usmapdata failed to update map data files. (error: ${{ needs.download.outputs.exit_code }})" "LOW"

92 changes: 0 additions & 92 deletions data-raw/certs/www2-census-gov-chain.pem

This file was deleted.

1 change: 0 additions & 1 deletion data-raw/scripts/config.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
[shapefiles]
url = https://www2.census.gov/geo/tiger/GENZ{year}/shp/cb_{year}_us_{entity}_{res}.zip
cert = www2-census-gov-chain.pem
current_year = 2024
entities = state,county
res = 20m
60 changes: 36 additions & 24 deletions data-raw/scripts/shapefiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,48 @@
import requests
import shutil
import sys
import tempfile
from zipfile import ZipFile

class DownloadError(Exception):
def __init__(self, message, code=None):
def __init__(self, message, code):
super().__init__(message)
self.code = code

def _download_and_extract(file_url: str, extract_dir: str, cert_url: str) -> bool:
response = requests.get(file_url, verify = cert_url)
LOCAL_FILE = "download.zip"
def _download_and_extract(file_url: str, extract_dir: str):
response = requests.get(file_url, timeout=300)

if response.status_code == 200:
with open(LOCAL_FILE, "wb") as f:
f.write(response.content)
print(f"{LOCAL_FILE} downloaded from {file_url}.")
if response.status_code != 200:
raise DownloadError(f"Failed to download {file_url}.", code=response.status_code)

with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as tmp_file:
tmp_filename = tmp_file.name
tmp_file.write(response.content)
print(f"Files downloaded from {file_url} to {tmp_filename}.")

with ZipFile(LOCAL_FILE, "r") as z:
try:
with ZipFile(tmp_filename, "r") as z:
z.extractall(extract_dir)
print(f"{LOCAL_FILE} extracted to {extract_dir}.")
print(f"{tmp_filename} extracted to {extract_dir}.")
finally:
os.remove(tmp_filename)

os.remove(LOCAL_FILE)
else:
raise DownloadError(f"Failed to download {file_url}.", code=response.status_code)
def _exit(sys_code: int, gh_code: int=None):
"""
Exits with the given code(s).

Parameters:
sys_code: The exit code to call sys.exit() with.
gh_code (optional): The code to set in the GitHub output.
If None, uses sys_code.
"""
gh_code = sys_code if gh_code is None else gh_code

def _failed(code: int):
if (gh_env := os.getenv("GITHUB_OUTPUT")):
with open(gh_env, "a") as f:
f.write(f"exit_code={code}\n")
f.write(f"exit_code={gh_code}\n")

sys.exit(code)
sys.exit(sys_code)

def download_shapefiles(selected_year=None):
"""
Expand All @@ -54,7 +66,6 @@ def download_shapefiles(selected_year=None):
SECTION = "shapefiles"

url_template = config.get(SECTION, "url")
cert_file = config.get(SECTION, "cert")
current_year = config.getint(SECTION, "current_year")
entities = config.get(SECTION, "entities").split(",")
res = config.get(SECTION, "res")
Expand All @@ -68,21 +79,18 @@ def download_shapefiles(selected_year=None):
with open(gh_env, "a") as f:
f.write(f"shp_year={year}\n")

# create cert file URL
cert_url = os.path.join(script_dir, "..", "certs", cert_file)

# create output directory
extract_dir = os.path.join(script_dir, "..", "shapefiles", str(year))

if os.path.exists(extract_dir):
shutil.rmtree(extract_dir)
shutil.os.makedirs(extract_dir)
os.makedirs(extract_dir)

try:
# attempt shapefile downloads
for entity in entities:
url = url_template.format(year=year, entity=entity, res=res)
_download_and_extract(url, extract_dir, cert_url)
_download_and_extract(url, extract_dir)

if (gh_env := os.getenv("GITHUB_ENV")):
with open(gh_env, "a") as f:
Expand All @@ -93,16 +101,20 @@ def download_shapefiles(selected_year=None):
config.set(SECTION, "current_year", f"{year}")
with open(config_file, "w") as f:
config.write(f)

_exit(0)
except DownloadError as e:
if e.code == 404: # i.e. shapefiles not found
print(f"The shapefiles for {year} were not found. Better luck next time!")
# "files not found" is not considered a system failure
_exit(sys_code=0, gh_code=404)
else: # other download errors
print(e)
_exit(e.code)

_failed(e.code)
except Exception as e:
print(e)
_failed(-1)
_exit(-1)


if __name__ == "__main__":
Expand Down
Loading