diff --git a/.github/workflows/review-existing-content.yml b/.github/workflows/review-existing-content.yml index 30285619ed47..05ae3aa202f2 100644 --- a/.github/workflows/review-existing-content.yml +++ b/.github/workflows/review-existing-content.yml @@ -6,24 +6,29 @@ name: "Scheduled jobs: Review existing content (dispatcher)" # picks the day's articles by weighted fair queuing — score = importance # (strategic tier x traffic) x staleness (days since the page's effective last # review) — and this workflow then fans out one -# content-review-article.yml run per selected article. Each of those worker -# runs reviews exactly one article, opens one ready PR, and force-dispatches -# claude-code-review.yml over it. Humans merge; nothing here is auto-merged. +# content-review-article.yml run per selected article. Each worker reviews +# exactly one article and, if it finds a fix, opens a draft PR; the re-lint +# gate promotes it to ready (firing the normal triage -> review chain) and +# arms GitHub auto-merge. master requires an approving review + the build +# check, so a human approval is still required to merge — auto-merge only +# removes the manual merge click after approval. # -# The whole job is inert until the CONTENT_REVIEW_ENABLED repo variable is -# 'true' — scheduled workflows only run from the default branch, so this -# can merge ahead of being switched on. workflow_dispatch always runs -# (that's the testing path; see the dry_run / count / paths inputs). -# -# CRON IS DISABLED for now: the dispatcher is MANUAL-ONLY during rollout. -# Re-enable the schedule below when the per-article worker has proven out. +# SINGLE SWITCH (default-on) — the CONTENT_REVIEW_COUNT repo variable is both +# the on/off and the cadence knob: +# unset -> on, 3 articles/run (the default) +# '0' -> off: the whole job is skipped (no runner, no spend) +# 'N' -> on, N articles/run +# Set it from Settings -> Variables to retune or pause with no PR. The job gate +# below is a plain string compare against '0' (the hard off); the selector also +# no-ops cleanly on any count<=0, so a stray '00'/'-1' still does no work. +# workflow_dispatch always runs regardless (the testing path; see the +# dry_run / count / paths inputs). on: - # Re-enable when ready (see note above): - # schedule: - # # Weekdays at 2:00PM UTC (one hour before check-links, so the two - # # bot-PR producers don't contend). - # - cron: '0 14 * * 1-5' + schedule: + # Weekdays at 2:00PM UTC (one hour before check-links, so the two + # bot-PR producers don't contend). + - cron: '0 14 * * 1-5' workflow_dispatch: inputs: count: @@ -48,8 +53,42 @@ permissions: actions: write # Required to dispatch content-review-article.yml per article jobs: + # Optional: skip the scheduled run on company holidays. Reads a BambooHR + # "Company Holidays" ICS feed from the BAMBOOHR_HOLIDAY_ICS_URL repo VARIABLE + # (a per-user feed token, treated as a temporary convenience — not a permanent + # shared resource). FAILS OPEN at every step: variable unset, feed 404/fetch + # failure, or parse error all yield is_holiday=false, so the dispatcher runs — + # a missing or stale feed never silently halts the pipeline. The URL is never + # echoed, so the token stays out of the logs. (Manual workflow_dispatch ignores + # this; see the `review` gate.) Always runs so `review`'s `needs` is satisfiable. + holiday-check: + name: Company-holiday check + runs-on: ubuntu-latest + outputs: + is_holiday: ${{ steps.check.outputs.is_holiday }} + steps: + - uses: actions/checkout@v4 + - id: check + env: + ICS_URL: ${{ vars.BAMBOOHR_HOLIDAY_ICS_URL }} + run: | + if [ -z "$ICS_URL" ]; then + echo "BAMBOOHR_HOLIDAY_ICS_URL not set; not skipping for holidays" + echo "is_holiday=false" >> "$GITHUB_OUTPUT"; exit 0 + fi + if ! curl -fsSL --max-time 20 "$ICS_URL" -o holidays.ics; then + echo "::warning::holiday feed unavailable (404/fetch failure); failing open (dispatcher will run)" + echo "is_holiday=false" >> "$GITHUB_OUTPUT"; exit 0 + fi + if python3 scripts/content-review/is-holiday.py --ics holidays.ics --tz America/Chicago; then + echo "is_holiday=true" >> "$GITHUB_OUTPUT" + else + echo "is_holiday=false" >> "$GITHUB_OUTPUT" + fi + review: name: Review existing content + needs: holiday-check runs-on: ubuntu-latest # PULUMI_STACK_NAME is an environment-scoped variable, so the job must # select the environment to resolve it — otherwise `Resolve ledger bucket` @@ -58,7 +97,9 @@ jobs: # Hard cost ceiling: selection + up to `count` article reviews fit well # inside this; a hung run dies rather than burning API budget. timeout-minutes: 60 - if: vars.CONTENT_REVIEW_ENABLED == 'true' || github.event_name == 'workflow_dispatch' + if: >- + github.event_name == 'workflow_dispatch' || + (vars.CONTENT_REVIEW_COUNT != '0' && needs.holiday-check.outputs.is_holiday != 'true') steps: # ESC runs before checkout so the bot token can authenticate the # checkout — the PRs opened by claude-code-action later then go out @@ -185,7 +226,9 @@ jobs: env: GH_TOKEN: ${{ steps.esc-secrets.outputs.PULUMI_BOT_TOKEN }} run: | - ARGS=(--count "${{ github.event.inputs.count || '3' }}" --out .content-review-queue.json) + # Count precedence: a manual workflow_dispatch input wins; otherwise the + # scheduled run uses the CONTENT_REVIEW_COUNT repo variable; otherwise 3. + ARGS=(--count "${{ github.event.inputs.count || vars.CONTENT_REVIEW_COUNT || '3' }}" --out .content-review-queue.json) if [ -d .ledger-cache ]; then ARGS+=(--ledger-dir .ledger-cache) fi diff --git a/scripts/content-review/is-holiday.py b/scripts/content-review/is-holiday.py new file mode 100644 index 000000000000..ffc2bd7f47e9 --- /dev/null +++ b/scripts/content-review/is-holiday.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 +"""Decide whether a date is a company holiday, per an iCalendar (ICS) feed. + +The content-review dispatcher uses this to skip its scheduled run on company +holidays. It reads an ICS feed (the Pulumi BambooHR "Company Holidays" feed) and +exits 0 when the target date — today in --tz by default — falls on a holiday, +and 1 otherwise. The dispatcher runs `if this exits 1`, so: + + exit 0 -> today IS a holiday -> dispatcher skips + exit 1 -> not a holiday/error -> dispatcher runs + +Errors FAIL OPEN (treated as "not a holiday", exit 1) so a calendar fetch or +parse hiccup never silently halts the pipeline — at worst it runs on a holiday +and opens PRs nobody reads until the next workday. + +Only all-day VEVENTs whose CATEGORIES contains --category (default +"Company Holidays", case-insensitive) count. So even if the feed is ever +repointed at a broader calendar that also carries individual time off, this only +ever skips on real company holidays, never on someone's PTO. Pass --any-category +to match every event regardless. + +All-day events use `VALUE=DATE` with an EXCLUSIVE DTEND (the morning after), so a +single-day holiday is DTSTART=D, DTEND=D+1, and the match is DTSTART <= day < DTEND. + +Usage: + is-holiday.py --ics feed.ics [--tz America/Chicago] [--date 2026-07-03] + is-holiday.py --self-test +""" + +from __future__ import annotations + +import argparse +import sys +from datetime import date, datetime, timedelta + +try: + from zoneinfo import ZoneInfo +except ImportError: # pragma: no cover - py<3.9 + ZoneInfo = None + + +def unfold(text: str) -> list[str]: + """Undo RFC 5545 line folding (continuation lines start with space/tab).""" + out: list[str] = [] + for raw in text.splitlines(): + if raw[:1] in (" ", "\t") and out: + out[-1] += raw[1:] + else: + out.append(raw) + return out + + +def _parse_date(val: str) -> date | None: + val = val.strip()[:8] + try: + return datetime.strptime(val, "%Y%m%d").date() + except ValueError: + return None + + +def parse_events(text: str) -> list[dict]: + """Return [{start, end, summary, categories}] for all-day VEVENTs.""" + events: list[dict] = [] + cur: dict | None = None + for line in unfold(text): + if line == "BEGIN:VEVENT": + cur = {"start": None, "end": None, "summary": "", "categories": ""} + elif line == "END:VEVENT": + if cur and cur["start"]: + if cur["end"] is None: # no DTEND -> single all-day event + cur["end"] = cur["start"] + timedelta(days=1) + events.append(cur) + cur = None + elif cur is not None: + name, _, value = line.partition(":") + key = name.split(";", 1)[0].upper() + if key == "DTSTART": + cur["start"] = _parse_date(value) + elif key == "DTEND": + cur["end"] = _parse_date(value) + elif key == "SUMMARY": + cur["summary"] = value.strip() + elif key == "CATEGORIES": + cur["categories"] = value.strip() + return events + + +def holiday_on(text: str, day: date, category: str | None) -> dict | None: + """The first matching holiday event covering `day`, or None.""" + for ev in parse_events(text): + if not ev["start"] or not ev["end"]: + continue + if category and category.lower() not in ev["categories"].lower(): + continue + if ev["start"] <= day < ev["end"]: + return ev + return None + + +def main() -> int: + ap = argparse.ArgumentParser(description=__doc__.split("\n\n")[0]) + ap.add_argument("--ics", help="path to the ICS feed file") + ap.add_argument("--tz", default="America/Chicago", help="timezone defining 'today'") + ap.add_argument("--date", help="override target date (YYYY-MM-DD); default: today in --tz") + ap.add_argument("--category", default="Company Holidays", + help="only match events whose CATEGORIES contains this (case-insensitive)") + ap.add_argument("--any-category", action="store_true", help="match events of any category") + ap.add_argument("--self-test", action="store_true") + args = ap.parse_args() + + if args.self_test: + return self_test() + if not args.ics: + ap.error("--ics is required (or --self-test)") + + # Resolve the target date. Any failure fails open (not a holiday -> run). + try: + if args.date: + day = datetime.strptime(args.date, "%Y-%m-%d").date() + elif ZoneInfo is not None: + day = datetime.now(ZoneInfo(args.tz)).date() + else: + day = datetime.now().date() + except Exception as e: # noqa: BLE001 - fail open on any clock/tz error + print(f"is-holiday: could not resolve date ({e}); treating as non-holiday", file=sys.stderr) + return 1 + + try: + text = open(args.ics, encoding="utf-8", errors="replace").read() + except OSError as e: + print(f"is-holiday: could not read {args.ics} ({e}); treating as non-holiday", file=sys.stderr) + return 1 + + category = None if args.any_category else args.category + match = holiday_on(text, day, category) + if match: + print(f"is-holiday: {day} is a company holiday: {match['summary'] or '(unnamed)'}") + return 0 + print(f"is-holiday: {day} is not a company holiday") + return 1 + + +def self_test() -> int: + failures = [] + + def check(name, cond): + print(("ok: " if cond else "FAIL: ") + name, file=sys.stdout if cond else sys.stderr) + if not cond: + failures.append(name) + + ics = ( + "BEGIN:VCALENDAR\r\n" + "BEGIN:VEVENT\r\nDTSTART;VALUE=DATE:20260703\r\nDTEND;VALUE=DATE:20260704\r\n" + "CATEGORIES:Company Holidays\r\nSUMMARY:Company Holiday - Independence Day\r\nEND:VEVENT\r\n" + # a multi-day span (DTEND exclusive): covers 28th and 29th, not the 30th + "BEGIN:VEVENT\r\nDTSTART;VALUE=DATE:20261228\r\nDTEND;VALUE=DATE:20261230\r\n" + "CATEGORIES:Company Holidays\r\nSUMMARY:Winter break\r\nEND:VEVENT\r\n" + # a PTO event the same day as a workday — must be ignored by the category filter + "BEGIN:VEVENT\r\nDTSTART;VALUE=DATE:20260626\r\nDTEND;VALUE=DATE:20260627\r\n" + "CATEGORIES:Time Off\r\nSUMMARY:Someone - Vacation\r\nEND:VEVENT\r\n" + # a folded SUMMARY line + no DTEND (single all-day) + "BEGIN:VEVENT\r\nDTSTART;VALUE=DATE:20260907\r\n" + "CATEGORIES:Company Holidays\r\nSUMMARY:Company Holiday - \r\n Labor Day\r\nEND:VEVENT\r\n" + "END:VCALENDAR\r\n" + ) + cat = "Company Holidays" + + check("single-day holiday matches", holiday_on(ics, date(2026, 7, 3), cat) is not None) + check("day before holiday does not match", holiday_on(ics, date(2026, 7, 2), cat) is None) + check("DTEND is exclusive (day after does not match)", holiday_on(ics, date(2026, 7, 4), cat) is None) + check("multi-day span: first day matches", holiday_on(ics, date(2026, 12, 28), cat) is not None) + check("multi-day span: middle day matches", holiday_on(ics, date(2026, 12, 29), cat) is not None) + check("multi-day span: exclusive end excluded", holiday_on(ics, date(2026, 12, 30), cat) is None) + check("category filter ignores non-holiday PTO", holiday_on(ics, date(2026, 6, 26), cat) is None) + check("--any-category would catch the PTO", holiday_on(ics, date(2026, 6, 26), None) is not None) + check("missing DTEND -> single all-day match", holiday_on(ics, date(2026, 9, 7), cat) is not None) + check("missing DTEND -> next day excluded", holiday_on(ics, date(2026, 9, 8), cat) is None) + check("folded SUMMARY is unfolded", (holiday_on(ics, date(2026, 9, 7), cat) or {}).get("summary") == "Company Holiday - Labor Day") + check("matched holiday carries its summary", "Independence Day" in (holiday_on(ics, date(2026, 7, 3), cat) or {}).get("summary", "")) + + if failures: + print(f"\n{len(failures)} failure(s)", file=sys.stderr) + return 1 + print("\nall is-holiday self-tests passed") + return 0 + + +if __name__ == "__main__": + sys.exit(main())