Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 60 additions & 17 deletions .github/workflows/review-existing-content.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,29 @@ name: "Scheduled jobs: Review existing content (dispatcher)"
# picks the day's articles by weighted fair queuing — score = importance
# (strategic tier x traffic) x staleness (days since the page's effective last
# review) — and this workflow then fans out one
# content-review-article.yml run per selected article. Each of those worker
# runs reviews exactly one article, opens one ready PR, and force-dispatches
# claude-code-review.yml over it. Humans merge; nothing here is auto-merged.
# content-review-article.yml run per selected article. Each worker reviews
# exactly one article and, if it finds a fix, opens a draft PR; the re-lint
# gate promotes it to ready (firing the normal triage -> review chain) and
# arms GitHub auto-merge. master requires an approving review + the build
# check, so a human approval is still required to merge — auto-merge only
# removes the manual merge click after approval.
#
# The whole job is inert until the CONTENT_REVIEW_ENABLED repo variable is
# 'true' — scheduled workflows only run from the default branch, so this
# can merge ahead of being switched on. workflow_dispatch always runs
# (that's the testing path; see the dry_run / count / paths inputs).
#
# CRON IS DISABLED for now: the dispatcher is MANUAL-ONLY during rollout.
# Re-enable the schedule below when the per-article worker has proven out.
# SINGLE SWITCH (default-on) — the CONTENT_REVIEW_COUNT repo variable is both
# the on/off and the cadence knob:
# unset -> on, 3 articles/run (the default)
# '0' -> off: the whole job is skipped (no runner, no spend)
# 'N' -> on, N articles/run
# Set it from Settings -> Variables to retune or pause with no PR. The job gate
# below is a plain string compare against '0' (the hard off); the selector also
# no-ops cleanly on any count<=0, so a stray '00'/'-1' still does no work.
# workflow_dispatch always runs regardless (the testing path; see the
# dry_run / count / paths inputs).

on:
# Re-enable when ready (see note above):
# schedule:
# # Weekdays at 2:00PM UTC (one hour before check-links, so the two
# # bot-PR producers don't contend).
# - cron: '0 14 * * 1-5'
schedule:
# Weekdays at 2:00PM UTC (one hour before check-links, so the two
# bot-PR producers don't contend).
- cron: '0 14 * * 1-5'
workflow_dispatch:
inputs:
count:
Expand All @@ -48,8 +53,42 @@ permissions:
actions: write # Required to dispatch content-review-article.yml per article

jobs:
# Optional: skip the scheduled run on company holidays. Reads a BambooHR
# "Company Holidays" ICS feed from the BAMBOOHR_HOLIDAY_ICS_URL repo VARIABLE
# (a per-user feed token, treated as a temporary convenience — not a permanent
# shared resource). FAILS OPEN at every step: variable unset, feed 404/fetch
# failure, or parse error all yield is_holiday=false, so the dispatcher runs —
# a missing or stale feed never silently halts the pipeline. The URL is never
# echoed, so the token stays out of the logs. (Manual workflow_dispatch ignores
# this; see the `review` gate.) Always runs so `review`'s `needs` is satisfiable.
holiday-check:
name: Company-holiday check
runs-on: ubuntu-latest
outputs:
is_holiday: ${{ steps.check.outputs.is_holiday }}
steps:
- uses: actions/checkout@v4
- id: check
env:
ICS_URL: ${{ vars.BAMBOOHR_HOLIDAY_ICS_URL }}
run: |
if [ -z "$ICS_URL" ]; then
echo "BAMBOOHR_HOLIDAY_ICS_URL not set; not skipping for holidays"
echo "is_holiday=false" >> "$GITHUB_OUTPUT"; exit 0
fi
if ! curl -fsSL --max-time 20 "$ICS_URL" -o holidays.ics; then
echo "::warning::holiday feed unavailable (404/fetch failure); failing open (dispatcher will run)"
echo "is_holiday=false" >> "$GITHUB_OUTPUT"; exit 0
fi
if python3 scripts/content-review/is-holiday.py --ics holidays.ics --tz America/Chicago; then
echo "is_holiday=true" >> "$GITHUB_OUTPUT"
else
echo "is_holiday=false" >> "$GITHUB_OUTPUT"
fi

review:
name: Review existing content
needs: holiday-check
runs-on: ubuntu-latest
# PULUMI_STACK_NAME is an environment-scoped variable, so the job must
# select the environment to resolve it — otherwise `Resolve ledger bucket`
Expand All @@ -58,7 +97,9 @@ jobs:
# Hard cost ceiling: selection + up to `count` article reviews fit well
# inside this; a hung run dies rather than burning API budget.
timeout-minutes: 60
if: vars.CONTENT_REVIEW_ENABLED == 'true' || github.event_name == 'workflow_dispatch'
if: >-
github.event_name == 'workflow_dispatch' ||
(vars.CONTENT_REVIEW_COUNT != '0' && needs.holiday-check.outputs.is_holiday != 'true')
steps:
# ESC runs before checkout so the bot token can authenticate the
# checkout — the PRs opened by claude-code-action later then go out
Expand Down Expand Up @@ -185,7 +226,9 @@ jobs:
env:
GH_TOKEN: ${{ steps.esc-secrets.outputs.PULUMI_BOT_TOKEN }}
run: |
ARGS=(--count "${{ github.event.inputs.count || '3' }}" --out .content-review-queue.json)
# Count precedence: a manual workflow_dispatch input wins; otherwise the
# scheduled run uses the CONTENT_REVIEW_COUNT repo variable; otherwise 3.
ARGS=(--count "${{ github.event.inputs.count || vars.CONTENT_REVIEW_COUNT || '3' }}" --out .content-review-queue.json)
if [ -d .ledger-cache ]; then
ARGS+=(--ledger-dir .ledger-cache)
fi
Expand Down
189 changes: 189 additions & 0 deletions scripts/content-review/is-holiday.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
#!/usr/bin/env python3
"""Decide whether a date is a company holiday, per an iCalendar (ICS) feed.

The content-review dispatcher uses this to skip its scheduled run on company
holidays. It reads an ICS feed (the Pulumi BambooHR "Company Holidays" feed) and
exits 0 when the target date — today in --tz by default — falls on a holiday,
and 1 otherwise. The dispatcher runs `if this exits 1`, so:

exit 0 -> today IS a holiday -> dispatcher skips
exit 1 -> not a holiday/error -> dispatcher runs

Errors FAIL OPEN (treated as "not a holiday", exit 1) so a calendar fetch or
parse hiccup never silently halts the pipeline — at worst it runs on a holiday
and opens PRs nobody reads until the next workday.

Only all-day VEVENTs whose CATEGORIES contains --category (default
"Company Holidays", case-insensitive) count. So even if the feed is ever
repointed at a broader calendar that also carries individual time off, this only
ever skips on real company holidays, never on someone's PTO. Pass --any-category
to match every event regardless.

All-day events use `VALUE=DATE` with an EXCLUSIVE DTEND (the morning after), so a
single-day holiday is DTSTART=D, DTEND=D+1, and the match is DTSTART <= day < DTEND.

Usage:
is-holiday.py --ics feed.ics [--tz America/Chicago] [--date 2026-07-03]
is-holiday.py --self-test
"""

from __future__ import annotations

import argparse
import sys
from datetime import date, datetime, timedelta

try:
from zoneinfo import ZoneInfo
except ImportError: # pragma: no cover - py<3.9
ZoneInfo = None


def unfold(text: str) -> list[str]:
"""Undo RFC 5545 line folding (continuation lines start with space/tab)."""
out: list[str] = []
for raw in text.splitlines():
if raw[:1] in (" ", "\t") and out:
out[-1] += raw[1:]
else:
out.append(raw)
return out


def _parse_date(val: str) -> date | None:
val = val.strip()[:8]
try:
return datetime.strptime(val, "%Y%m%d").date()
except ValueError:
return None


def parse_events(text: str) -> list[dict]:
"""Return [{start, end, summary, categories}] for all-day VEVENTs."""
events: list[dict] = []
cur: dict | None = None
for line in unfold(text):
if line == "BEGIN:VEVENT":
cur = {"start": None, "end": None, "summary": "", "categories": ""}
elif line == "END:VEVENT":
if cur and cur["start"]:
if cur["end"] is None: # no DTEND -> single all-day event
cur["end"] = cur["start"] + timedelta(days=1)
events.append(cur)
cur = None
elif cur is not None:
name, _, value = line.partition(":")
key = name.split(";", 1)[0].upper()
if key == "DTSTART":
cur["start"] = _parse_date(value)
elif key == "DTEND":
cur["end"] = _parse_date(value)
elif key == "SUMMARY":
cur["summary"] = value.strip()
elif key == "CATEGORIES":
cur["categories"] = value.strip()
return events


def holiday_on(text: str, day: date, category: str | None) -> dict | None:
"""The first matching holiday event covering `day`, or None."""
for ev in parse_events(text):
if not ev["start"] or not ev["end"]:
continue
if category and category.lower() not in ev["categories"].lower():
continue
if ev["start"] <= day < ev["end"]:
return ev
return None


def main() -> int:
ap = argparse.ArgumentParser(description=__doc__.split("\n\n")[0])
ap.add_argument("--ics", help="path to the ICS feed file")
ap.add_argument("--tz", default="America/Chicago", help="timezone defining 'today'")
ap.add_argument("--date", help="override target date (YYYY-MM-DD); default: today in --tz")
ap.add_argument("--category", default="Company Holidays",
help="only match events whose CATEGORIES contains this (case-insensitive)")
ap.add_argument("--any-category", action="store_true", help="match events of any category")
ap.add_argument("--self-test", action="store_true")
args = ap.parse_args()

if args.self_test:
return self_test()
if not args.ics:
ap.error("--ics is required (or --self-test)")

# Resolve the target date. Any failure fails open (not a holiday -> run).
try:
if args.date:
day = datetime.strptime(args.date, "%Y-%m-%d").date()
elif ZoneInfo is not None:
day = datetime.now(ZoneInfo(args.tz)).date()
else:
day = datetime.now().date()
except Exception as e: # noqa: BLE001 - fail open on any clock/tz error
print(f"is-holiday: could not resolve date ({e}); treating as non-holiday", file=sys.stderr)
return 1

try:
text = open(args.ics, encoding="utf-8", errors="replace").read()
except OSError as e:
print(f"is-holiday: could not read {args.ics} ({e}); treating as non-holiday", file=sys.stderr)
return 1

category = None if args.any_category else args.category
match = holiday_on(text, day, category)
if match:
print(f"is-holiday: {day} is a company holiday: {match['summary'] or '(unnamed)'}")
return 0
print(f"is-holiday: {day} is not a company holiday")
return 1


def self_test() -> int:
failures = []

def check(name, cond):
print(("ok: " if cond else "FAIL: ") + name, file=sys.stdout if cond else sys.stderr)
if not cond:
failures.append(name)

ics = (
"BEGIN:VCALENDAR\r\n"
"BEGIN:VEVENT\r\nDTSTART;VALUE=DATE:20260703\r\nDTEND;VALUE=DATE:20260704\r\n"
"CATEGORIES:Company Holidays\r\nSUMMARY:Company Holiday - Independence Day\r\nEND:VEVENT\r\n"
# a multi-day span (DTEND exclusive): covers 28th and 29th, not the 30th
"BEGIN:VEVENT\r\nDTSTART;VALUE=DATE:20261228\r\nDTEND;VALUE=DATE:20261230\r\n"
"CATEGORIES:Company Holidays\r\nSUMMARY:Winter break\r\nEND:VEVENT\r\n"
# a PTO event the same day as a workday — must be ignored by the category filter
"BEGIN:VEVENT\r\nDTSTART;VALUE=DATE:20260626\r\nDTEND;VALUE=DATE:20260627\r\n"
"CATEGORIES:Time Off\r\nSUMMARY:Someone - Vacation\r\nEND:VEVENT\r\n"
# a folded SUMMARY line + no DTEND (single all-day)
"BEGIN:VEVENT\r\nDTSTART;VALUE=DATE:20260907\r\n"
"CATEGORIES:Company Holidays\r\nSUMMARY:Company Holiday - \r\n Labor Day\r\nEND:VEVENT\r\n"
"END:VCALENDAR\r\n"
)
cat = "Company Holidays"

check("single-day holiday matches", holiday_on(ics, date(2026, 7, 3), cat) is not None)
check("day before holiday does not match", holiday_on(ics, date(2026, 7, 2), cat) is None)
check("DTEND is exclusive (day after does not match)", holiday_on(ics, date(2026, 7, 4), cat) is None)
check("multi-day span: first day matches", holiday_on(ics, date(2026, 12, 28), cat) is not None)
check("multi-day span: middle day matches", holiday_on(ics, date(2026, 12, 29), cat) is not None)
check("multi-day span: exclusive end excluded", holiday_on(ics, date(2026, 12, 30), cat) is None)
check("category filter ignores non-holiday PTO", holiday_on(ics, date(2026, 6, 26), cat) is None)
check("--any-category would catch the PTO", holiday_on(ics, date(2026, 6, 26), None) is not None)
check("missing DTEND -> single all-day match", holiday_on(ics, date(2026, 9, 7), cat) is not None)
check("missing DTEND -> next day excluded", holiday_on(ics, date(2026, 9, 8), cat) is None)
check("folded SUMMARY is unfolded", (holiday_on(ics, date(2026, 9, 7), cat) or {}).get("summary") == "Company Holiday - Labor Day")
check("matched holiday carries its summary", "Independence Day" in (holiday_on(ics, date(2026, 7, 3), cat) or {}).get("summary", ""))

if failures:
print(f"\n{len(failures)} failure(s)", file=sys.stderr)
return 1
print("\nall is-holiday self-tests passed")
return 0


if __name__ == "__main__":
sys.exit(main())
Loading