From 89256960ce5cc65c28c7ffd7c5423c0039383e47 Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon <9553966+theagenticguy@users.noreply.github.com> Date: Wed, 11 Mar 2026 08:12:57 -0500 Subject: [PATCH 1/6] feat(tools): add reference integrity, size validation, and skill scaffolding Port best practices from personal-plugins repo: - validate-references.py: BFS link crawler that walks from each SKILL.md through all referenced markdown files. Catches broken links (exit 1) and orphaned reference files (warning). Covers 59 reference files including cross-skill links. - validate-size.py: SKILL.md size checker with extraction-candidate detection. Identifies oversized code blocks (>30 lines) that should be moved to references/. Complements the existing SKILL001 markdownlint rule with detailed diagnostics. - init-skill.py: Skill scaffolding tool for contributors. Creates correctly structured SKILL.md with frontmatter template + references/. - markdownlint-frontmatter.cjs: Extended SKILL002 rule with frontmatter property whitelist. Rejects non-spec properties that Claude Code would silently ignore. - skill-frontmatter.schema.json: Tightened additionalProperties to false, added license and metadata as defined extension properties. - mise.toml: Added validate:refs, validate:size, validate (composite), and init:skill tasks. Wired validate into the build pipeline. Co-Authored-By: Claude Opus 4.6 --- mise.toml | 30 +++- schemas/skill-frontmatter.schema.json | 10 +- tools/init-skill.py | 125 +++++++++++++++ tools/markdownlint-frontmatter.cjs | 30 ++++ tools/validate-references.py | 219 ++++++++++++++++++++++++++ tools/validate-size.py | 168 ++++++++++++++++++++ 6 files changed, 580 insertions(+), 2 deletions(-) create mode 100644 tools/init-skill.py create mode 100644 tools/validate-references.py create mode 100644 tools/validate-size.py diff --git a/mise.toml b/mise.toml index 9d994ab6..8a5ba37a 100644 --- a/mise.toml +++ b/mise.toml @@ -65,6 +65,33 @@ run = [ { task = "lint:cross-refs"} ] +# ============= +# VALIDATION +# ============= + +[tasks."validate:refs"] +description = "Detect broken links and orphaned reference files" +run = "uv run tools/validate-references.py" + +[tasks."validate:size"] +description = "Check SKILL.md sizes and flag extraction candidates" +run = "uv run tools/validate-size.py" + +[tasks.validate] +description = "Run all validation checks" +run = [ + { task = "validate:refs" }, + { task = "validate:size" } +] + +# ============= +# SCAFFOLDING +# ============= + +[tasks."init:skill"] +description = "Scaffold a new skill directory" +run = "uv run tools/init-skill.py" + # ========= # SECURITY # ========= @@ -114,9 +141,10 @@ run = [ # =============== [tasks.build] -description = "Complete build: lint, format, security scans" +description = "Complete build: lint, format, validate, security scans" run = [ { task = "lint" }, { task = "fmt:check" }, + { task = "validate" }, { task = "security"} ] diff --git a/schemas/skill-frontmatter.schema.json b/schemas/skill-frontmatter.schema.json index b1c56cee..bb9b3b99 100644 --- a/schemas/skill-frontmatter.schema.json +++ b/schemas/skill-frontmatter.schema.json @@ -1,7 +1,7 @@ { "$id": "https://awslabs.github.io/agent-plugins/schemas/skill-frontmatter.schema.json", "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": true, + "additionalProperties": false, "description": "Schema for YAML frontmatter in SKILL.md files", "properties": { "agent": { @@ -33,6 +33,14 @@ "description": "Prevent Claude from auto-loading", "type": "boolean" }, + "license": { + "description": "SPDX license identifier", + "type": "string" + }, + "metadata": { + "description": "Plugin-specific metadata (author, version, etc.)", + "type": "object" + }, "model": { "description": "Model to use when skill is active", "type": "string" diff --git a/tools/init-skill.py b/tools/init-skill.py new file mode 100644 index 00000000..cccda2ee --- /dev/null +++ b/tools/init-skill.py @@ -0,0 +1,125 @@ +# /// script +# requires-python = ">=3.12" +# dependencies = [] +# /// +"""Scaffold a new skill directory with correct SKILL.md template.""" + +import re +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parent.parent +PLUGINS_DIR = ROOT / "plugins" +NAME_RE = re.compile(r"^[a-z][a-z0-9-]*$") + +SKILL_TEMPLATE = """\ +--- +name: {name} +description: > + {description} +--- + +# {title} + +## When to Use + +[FILL: Describe the specific scenarios and contexts where this skill applies.] + +## Core Concepts + +[FILL: Key concepts, terminology, and mental models.] + +## Quick Reference + +[FILL: The most commonly needed patterns, commands, or code snippets.] + +## Anti-Patterns + +[FILL: Common mistakes and what to do instead.] + +## References + +- `references/` — [FILL: Describe what detailed reference material is available.] +""" + + +def main() -> int: + if len(sys.argv) < 4: + print(f"Usage: uv run {sys.argv[0]} ") + print( + f"Example: uv run {sys.argv[0]} deploy-on-aws my-skill" + " 'Brief description. Use when the user asks to...'" + ) + return 1 + + plugin_name = sys.argv[1] + skill_name = sys.argv[2] + description = sys.argv[3] + + # Validate plugin exists + plugin_dir = PLUGINS_DIR / plugin_name + if not plugin_dir.is_dir(): + print(f"Error: plugin '{plugin_name}' not found at {plugin_dir}") + available = sorted( + d.name for d in PLUGINS_DIR.iterdir() if d.is_dir() and not d.name.startswith(".") + ) + if available: + print(f"Available plugins: {', '.join(available)}") + return 1 + + # Validate skill name format + if not NAME_RE.match(skill_name): + print(f"Error: '{skill_name}' is not a valid skill name") + print("Must be kebab-case (lowercase letters, digits, hyphens)") + return 1 + + if len(skill_name) > 64: + print(f"Error: skill name exceeds 64 characters (current: {len(skill_name)})") + return 1 + + if "--" in skill_name: + print(f"Error: '{skill_name}' contains consecutive hyphens") + return 1 + + # Check reserved words + if re.search(r"\b(anthropic|claude)\b", skill_name, re.IGNORECASE): + print(f"Error: '{skill_name}' contains reserved word") + return 1 + + # Check skill doesn't already exist + skills_dir = plugin_dir / "skills" + skill_dir = skills_dir / skill_name + if skill_dir.exists(): + print(f"Error: skill '{skill_name}' already exists at {skill_dir}") + return 1 + + # Warn about trigger phrase + if "Use when" not in description and "Use this" not in description: + print("Warning: description should contain 'Use when' or 'Use this' trigger phrase") + + # Create structure + title = skill_name.replace("-", " ").title() + skill_dir.mkdir(parents=True) + (skill_dir / "SKILL.md").write_text( + SKILL_TEMPLATE.format(name=skill_name, description=description, title=title), + encoding="utf-8", + ) + refs_dir = skill_dir / "references" + refs_dir.mkdir() + (refs_dir / ".gitkeep").touch() + + rel_path = skill_dir.relative_to(ROOT) + print(f"Created skill '{skill_name}' in plugin '{plugin_name}':") + print(f" {rel_path}/SKILL.md") + print(f" {rel_path}/references/.gitkeep") + print() + print("Next steps:") + print(f" 1. Edit {rel_path}/SKILL.md — fill in the [FILL] sections") + print(f" 2. Add reference files to {rel_path}/references/") + print(f" 3. Run: mise run validate") + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/markdownlint-frontmatter.cjs b/tools/markdownlint-frontmatter.cjs index 3f911600..f8198d51 100644 --- a/tools/markdownlint-frontmatter.cjs +++ b/tools/markdownlint-frontmatter.cjs @@ -5,12 +5,30 @@ * Required: * - name: skill identifier (kebab-case) * - description: when to use this skill (min 20 chars) + * + * Also enforces a property whitelist per skill-frontmatter.schema.json. */ "use strict"; const path = require("path"); +// Allowed frontmatter properties per the skill-frontmatter schema +// Core spec properties + common extensions (license, metadata) +const ALLOWED_PROPERTIES = new Set([ + "name", + "description", + "context", + "agent", + "model", + "allowed-tools", + "argument-hint", + "user-invocable", + "disable-model-invocation", + "license", + "metadata", +]); + module.exports = { names: ["skill-frontmatter", "SKILL002"], description: "SKILL.md files must have required frontmatter fields", @@ -146,5 +164,17 @@ module.exports = { }); } } + + // Check for non-spec frontmatter properties + const topLevelKeys = frontmatter.match(/^[a-z][\w-]*(?=:)/gm) || []; + for (const key of topLevelKeys) { + if (!ALLOWED_PROPERTIES.has(key)) { + onError({ + lineNumber: 2, + detail: `Non-spec frontmatter property: "${key}" (allowed: ${[...ALLOWED_PROPERTIES].sort().join(", ")})`, + context: "Unknown property", + }); + } + } }, }; diff --git a/tools/validate-references.py b/tools/validate-references.py new file mode 100644 index 00000000..a3f42eed --- /dev/null +++ b/tools/validate-references.py @@ -0,0 +1,219 @@ +# /// script +# requires-python = ">=3.12" +# dependencies = [] +# /// +"""Detect broken links and orphaned reference files across all plugins. + +Walks from each SKILL.md through all referenced markdown files using BFS, +reports broken links (exit 1) and orphaned reference files (warning). +""" + +import re +import sys +from collections import deque +from pathlib import Path + +ROOT = Path(__file__).resolve().parent.parent +PLUGINS_DIR = ROOT / "plugins" + +# File extensions to track as linkable resources +EXT = r"md|py|ts|js|json|yaml|yml|toml|txt|tsx|jsx|sh|cjs|mjs" +# Patterns for extracting file references from markdown +INLINE_CODE_RE = re.compile(rf"`([^`]+\.(?:{EXT}))`") +MD_LINK_RE = re.compile(rf"\[(?:[^\]]*)\]\(([^)]+\.(?:{EXT}))\)") +# Plain-text paths containing references/ (catches table cells, prose) +# Negative lookbehind prevents matching partial paths inside markdown link hrefs +# e.g. won't match "references/foo.md" from "../../other-skill/references/foo.md" +PLAIN_REF_RE = re.compile(rf"(?|]+\.(?:{EXT}))") + +RED = "\033[91m" +YELLOW = "\033[93m" +GREEN = "\033[92m" +BOLD = "\033[1m" +RESET = "\033[0m" + + +def extract_refs(text: str) -> set[str]: + """Extract all file path references from markdown text.""" + refs: set[str] = set() + for pattern in (INLINE_CODE_RE, MD_LINK_RE): + refs.update(pattern.findall(text)) + refs.update(PLAIN_REF_RE.findall(text)) + return refs + + +def find_skill_root(source_file: Path) -> Path | None: + """Find the skill root directory (plugins/X/skills/Y/) for a source file.""" + resolved = source_file.resolve() + for parent in resolved.parents: + if parent.parent.name == "skills" and parent.parent.parent.exists(): + # Verify this is under plugins/ + try: + parent.relative_to(PLUGINS_DIR) + return parent + except ValueError: + pass + return None + + +def resolve_ref(ref: str, source_file: Path) -> list[Path]: + """Resolve a reference path to candidate file paths.""" + # Skip URLs, mailto, fragment-only links + if ref.startswith(("http://", "https://", "mailto:", "#")): + return [] + + candidates = [] + # Try relative to the source file's directory + candidates.append((source_file.parent / ref).resolve()) + # Also try relative to the skill root + skill_root = find_skill_root(source_file) + if skill_root: + from_root = (skill_root / ref).resolve() + if from_root not in candidates: + candidates.append(from_root) + # Try relative to the skill's references/ directory + # Handles convention where files in references/ use short paths like + # `design-refs/foo.md` meaning `references/design-refs/foo.md` + from_refs = (skill_root / "references" / ref).resolve() + if from_refs not in candidates: + candidates.append(from_refs) + return candidates + + +def collect_all_resource_files() -> set[Path]: + """Collect all files under plugins/*/skills/*/references/.""" + files: set[Path] = set() + for plugin_dir in PLUGINS_DIR.iterdir(): + if not plugin_dir.is_dir(): + continue + skills_dir = plugin_dir / "skills" + if not skills_dir.is_dir(): + continue + for skill_dir in skills_dir.iterdir(): + if not skill_dir.is_dir(): + continue + refs_dir = skill_dir / "references" + if refs_dir.is_dir(): + for f in refs_dir.rglob("*"): + if f.is_file() and f.name != ".gitkeep": + files.add(f.resolve()) + return files + + +def collect_entry_points() -> list[Path]: + """Collect all SKILL.md files as entry points.""" + entries: list[Path] = [] + for plugin_dir in PLUGINS_DIR.iterdir(): + if not plugin_dir.is_dir(): + continue + skills_dir = plugin_dir / "skills" + if not skills_dir.is_dir(): + continue + for skill_dir in skills_dir.iterdir(): + if skill_dir.is_dir(): + skill_md = skill_dir / "SKILL.md" + if skill_md.exists(): + entries.append(skill_md) + return entries + + +def main() -> int: + if not PLUGINS_DIR.is_dir(): + print(f"{YELLOW}No plugins/ directory found{RESET}") + return 0 + + all_resources = collect_all_resource_files() + entry_points = collect_entry_points() + + if not entry_points: + print(f"{YELLOW}No SKILL.md entry points found{RESET}") + return 0 + + # BFS: track which resource files are reachable + reachable: set[Path] = set() + broken_links: list[tuple[Path, str]] = [] + + queue: deque[Path] = deque(entry_points) + visited: set[Path] = set() + + while queue: + current = queue.popleft() + resolved = current.resolve() + if resolved in visited: + continue + visited.add(resolved) + + if not current.exists(): + continue + + text = current.read_text(encoding="utf-8") + refs = extract_refs(text) + + for ref in refs: + if ref.startswith(("http://", "https://", "mailto:", "#")): + continue + # Skip glob patterns + if "*" in ref or "?" in ref: + continue + + candidates = resolve_ref(ref, current) + if not candidates: + continue + + found_any = any(p.exists() for p in candidates) + + if not found_any: + # Only report as broken if the path looks like a reference + ref_lower = ref.lower() + if any( + kw in ref_lower + for kw in ("references/", "templates/", "skills/", "phases/") + ): + broken_links.append((current, ref)) + continue + + for candidate in candidates: + if candidate.exists(): + reachable.add(candidate) + # If it's a markdown file, crawl it too for transitive links + if candidate not in visited and candidate.suffix == ".md": + queue.append(candidate) + + orphans = all_resources - reachable + + print(f"{BOLD}Reference Integrity Check{RESET}") + print( + f"Entry points: {len(entry_points)}, Resource files: {len(all_resources)}\n" + ) + + if broken_links: + print(f"{RED}Broken links ({len(broken_links)}):{RESET}") + for source, ref in sorted(broken_links, key=lambda x: (str(x[0]), x[1])): + rel_source = source.relative_to(ROOT) + print(f" {RED}BROKEN{RESET} {rel_source} -> {ref}") + print() + + if orphans: + print(f"{YELLOW}Orphaned files ({len(orphans)}):{RESET}") + for p in sorted(orphans): + rel = p.relative_to(ROOT) + print(f" {YELLOW}ORPHAN{RESET} {rel}") + print() + + if not broken_links and not orphans: + print( + f" {GREEN}All {len(all_resources)} resource files are reachable." + f" No broken links found.{RESET}\n" + ) + + reachable_count = len(all_resources) - len(orphans) + print( + f"{BOLD}Summary:{RESET} {reachable_count}/{len(all_resources)} reachable," + f" {len(orphans)} orphaned, {len(broken_links)} broken links" + ) + + return 1 if broken_links else 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/validate-size.py b/tools/validate-size.py new file mode 100644 index 00000000..20a8a987 --- /dev/null +++ b/tools/validate-size.py @@ -0,0 +1,168 @@ +# /// script +# requires-python = ">=3.12" +# dependencies = [] +# /// +"""Check SKILL.md line counts and flag extraction candidates. + +Complements the SKILL001 markdownlint rule (which enforces hard limits) +by providing detailed diagnostics: per-skill size table, approximate +token counts, and specific code blocks that should be moved to references/. +""" + +import re +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parent.parent +PLUGINS_DIR = ROOT / "plugins" + +IDEAL_MAX = 200 +GOOD_MAX = 300 +WARNING_MAX = 500 +CODE_BLOCK_THRESHOLD = 30 # lines — suggest extraction if a code block exceeds this + +RED = "\033[91m" +YELLOW = "\033[93m" +GREEN = "\033[92m" +DIM = "\033[2m" +BOLD = "\033[1m" +RESET = "\033[0m" + +FENCE_RE = re.compile(r"^(`{3,})") + + +def find_extraction_candidates(text: str) -> list[str]: + """Find code blocks over CODE_BLOCK_THRESHOLD lines as extraction candidates.""" + candidates: list[str] = [] + in_block = False + block_start = 0 + block_lang = "" + block_lines = 0 + fence_len = 0 + + for i, line in enumerate(text.splitlines(), 1): + stripped = line.strip() + m = FENCE_RE.match(stripped) + if m and not in_block: + in_block = True + fence_len = len(m.group(1)) + block_start = i + block_lang = stripped[fence_len:].strip() or "unknown" + block_lines = 0 + elif in_block and m and len(m.group(1)) >= fence_len and stripped == m.group(1): + in_block = False + if block_lines > CODE_BLOCK_THRESHOLD: + candidates.append( + f"Code block ({block_lang}) at line {block_start}: {block_lines} lines" + ) + elif in_block: + block_lines += 1 + + return candidates + + +def collect_skills() -> list[tuple[str, str, Path]]: + """Collect all (plugin_name, skill_name, skill_md_path) tuples.""" + skills = [] + for plugin_dir in sorted(PLUGINS_DIR.iterdir()): + if not plugin_dir.is_dir(): + continue + skills_dir = plugin_dir / "skills" + if not skills_dir.is_dir(): + continue + for skill_dir in sorted(skills_dir.iterdir()): + if not skill_dir.is_dir(): + continue + skill_md = skill_dir / "SKILL.md" + if skill_md.exists(): + skills.append((plugin_dir.name, skill_dir.name, skill_md)) + return skills + + +def main() -> int: + skills = collect_skills() + + if not skills: + print(f"{YELLOW}No skills found{RESET}") + return 0 + + has_errors = False + results: list[tuple[str, str, int, int, str]] = [] + + for plugin_name, skill_name, skill_md in skills: + text = skill_md.read_text(encoding="utf-8") + lines = len(text.splitlines()) + tokens = len(text) // 4 # rough estimate + + if lines > WARNING_MAX: + level = "error" + has_errors = True + elif lines > GOOD_MAX: + level = "warning" + elif lines > IDEAL_MAX: + level = "good" + else: + level = "ideal" + + label = f"{plugin_name}/{skill_name}" + results.append((label, skill_name, lines, tokens, level)) + + # Sort by line count descending + results.sort(key=lambda r: r[2], reverse=True) + + print(f"{BOLD}SKILL.md Size Check{RESET}") + print(f"Checking {len(results)} skills...\n") + + print(f" {'Plugin/Skill':<50} {'Lines':>6} {'~Tokens':>8} Status") + print(f" {'─' * 50} {'─' * 6} {'─' * 8} {'─' * 12}") + + for label, _, lines, tokens, level in results: + if level == "error": + status = f"{RED}EXTRACT{RESET}" + color = RED + elif level == "warning": + status = f"{YELLOW}REVIEW{RESET}" + color = YELLOW + elif level == "good": + status = f"{DIM}good{RESET}" + color = "" + else: + status = f"{GREEN}ideal{RESET}" + color = "" + + line_str = f"{color}{lines:>6}{RESET}" if color else f"{lines:>6}" + print(f" {label:<50} {line_str} {tokens:>8} {status}") + + # Show extraction candidates for oversized skills + oversized = [ + (label, skill_name) + for label, skill_name, lines, _, level in results + if level in ("error", "warning") + ] + if oversized: + print(f"\n{BOLD}Extraction candidates:{RESET}") + for label, skill_name in oversized: + # Find the matching skill path + for plugin_name, sn, skill_md in skills: + if sn == skill_name: + text = skill_md.read_text(encoding="utf-8") + candidates = find_extraction_candidates(text) + if candidates: + print(f"\n {YELLOW}{label}{RESET}:") + for c in candidates: + print(f" → {c}") + break + + error_count = sum(1 for _, _, _, _, level in results if level == "error") + warn_count = sum(1 for _, _, _, _, level in results if level == "warning") + print( + f"\n{BOLD}Summary:{RESET} {len(results)} skills," + f" {error_count} over {WARNING_MAX} (error)," + f" {warn_count} over {GOOD_MAX} (warning)" + ) + + return 1 if has_errors else 0 + + +if __name__ == "__main__": + sys.exit(main()) From 66b778959e7939f1e9c171f1612a699545e83ba8 Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon <9553966+theagenticguy@users.noreply.github.com> Date: Wed, 11 Mar 2026 08:40:20 -0500 Subject: [PATCH 2/6] feat(tools): add URL liveness checker and size validator with extraction candidates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New tools: - validate-urls.py: Async HTTPS URL liveness checker using httpx. HEAD with GET fallback, concurrency-limited, GitHub token aware. Reports broken links and permanent redirects. Not in build pipeline (network- dependent) — run via `mise run validate:urls`. - validate-size.py: SKILL.md size checker that identifies code blocks over 30 lines as extraction candidates for references/. - .url-check-ignore: Skip patterns for placeholder URLs, CloudFormation template variables, and API endpoints. URL fixes found by the checker: - Fix 404: amazon-location-samples repo → aws-geospatial org page - Fix 301: help.github.com → docs.github.com (CONTRIBUTING.md) - Fix 301: docs.github.com actions workflow path (TROUBLESHOOTING.md) - Fix 301: docs.powertools.aws.dev → docs.aws.amazon.com/powertools - Fix 301: aws-otel.github.io trailing slash (observability.md) Co-Authored-By: Claude Opus 4.6 --- .url-check-ignore | 20 ++ CONTRIBUTING.md | 2 +- docs/TROUBLESHOOTING.md | 2 +- mise.toml | 4 + .../skills/amazon-location-service/SKILL.md | 2 +- .../aws-serverless/skills/aws-lambda/SKILL.md | 2 +- .../aws-lambda/references/observability.md | 2 +- tools/validate-urls.py | 232 ++++++++++++++++++ 8 files changed, 261 insertions(+), 5 deletions(-) create mode 100644 .url-check-ignore create mode 100644 tools/validate-urls.py diff --git a/.url-check-ignore b/.url-check-ignore new file mode 100644 index 00000000..726331a2 --- /dev/null +++ b/.url-check-ignore @@ -0,0 +1,20 @@ +# URL patterns to skip during liveness checks. +# Each line is a substring match — if the URL contains this string, it's skipped. + +# Localhost / example / placeholder URLs +https://localhost +https://127.0.0.1 +https://example.com +https://your- +https://my- +https://myapp. + +# CloudFormation / SAM template variables in URLs +${AWS:: + +# API endpoints (not browsable) +https://maps.geo. +https://places.geo. +https://routes.geo. +https://geofencing.geo. +https://tracking.geo. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b1ff167e..0f346f2e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -43,7 +43,7 @@ To send us a pull request, please: 5. Send us a pull request, answering any default questions in the pull request interface. 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. -GitHub provides additional documentation on [forking a repository](https://help.github.com/articles/fork-a-repo/) and [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). +GitHub provides additional documentation on [forking a repository](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo) and [creating a pull request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request). ## Finding Contributions to Work On diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md index 15395003..06ca4d2c 100644 --- a/docs/TROUBLESHOOTING.md +++ b/docs/TROUBLESHOOTING.md @@ -189,7 +189,7 @@ GitHub Actions workflows can occasionally fail due to intermittent issues (netwo 1. Open the failed check from your pull request's **Checks** tab. 2. Click **Re-run failed jobs**. -See [Re-running workflows and jobs](https://docs.github.com/en/actions/managing-workflow-runs/re-running-workflows-and-jobs) for details. +See [Re-running workflows and jobs](https://docs.github.com/en/actions/how-tos/manage-workflow-runs/re-run-workflows-and-jobs) for details. #### Via the `gh` CLI diff --git a/mise.toml b/mise.toml index 8a5ba37a..560b2eb6 100644 --- a/mise.toml +++ b/mise.toml @@ -77,6 +77,10 @@ run = "uv run tools/validate-references.py" description = "Check SKILL.md sizes and flag extraction candidates" run = "uv run tools/validate-size.py" +[tasks."validate:urls"] +description = "Check HTTPS URLs in markdown files return 200 (network-dependent, not in build)" +run = "uv run tools/validate-urls.py" + [tasks.validate] description = "Run all validation checks" run = [ diff --git a/plugins/amazon-location-service/skills/amazon-location-service/SKILL.md b/plugins/amazon-location-service/skills/amazon-location-service/SKILL.md index 575a4cc9..2134915e 100644 --- a/plugins/amazon-location-service/skills/amazon-location-service/SKILL.md +++ b/plugins/amazon-location-service/skills/amazon-location-service/SKILL.md @@ -210,7 +210,7 @@ Integrates with the [AWS MCP Server](https://docs.aws.amazon.com/aws-mcp/latest/ - [Amazon Location Service Developer Guide](https://docs.aws.amazon.com/location/latest/developerguide/) - [Amazon Location Service API Reference](https://docs.aws.amazon.com/location/latest/APIReference/) -- [Amazon Location Service Samples Repository](https://github.com/aws-geospatial/amazon-location-samples) +- [Amazon Location Service Samples](https://github.com/aws-geospatial) ## Reference Files diff --git a/plugins/aws-serverless/skills/aws-lambda/SKILL.md b/plugins/aws-serverless/skills/aws-lambda/SKILL.md index 6572029e..9e074af2 100644 --- a/plugins/aws-serverless/skills/aws-lambda/SKILL.md +++ b/plugins/aws-serverless/skills/aws-lambda/SKILL.md @@ -166,6 +166,6 @@ When not specified, ALWAYS use CDK - [AWS SAM Documentation](https://docs.aws.amazon.com/serverless-application-model/) - [AWS Lambda Documentation](https://docs.aws.amazon.com/lambda/) -- [AWS Lambda Powertools](https://docs.powertools.aws.dev/lambda/) +- [AWS Lambda Powertools](https://docs.aws.amazon.com/powertools/) - [AWS CDK Documentation](https://docs.aws.amazon.com/cdk/) - [AWS Serverless MCP Server](https://github.com/awslabs/mcp/tree/main/src/aws-serverless-mcp-server) diff --git a/plugins/aws-serverless/skills/aws-lambda/references/observability.md b/plugins/aws-serverless/skills/aws-lambda/references/observability.md index cd6d1ac3..0726214f 100644 --- a/plugins/aws-serverless/skills/aws-lambda/references/observability.md +++ b/plugins/aws-serverless/skills/aws-lambda/references/observability.md @@ -198,7 +198,7 @@ Globals: OTEL_TRACES_SAMPLER: xray ``` -Layer ARNs vary by runtime and architecture. Check the [ADOT Lambda layer documentation](https://aws-otel.github.io/docs/getting-started/lambda) for the correct ARN for your runtime (Python, Node.js, Java, .NET) and architecture (amd64, arm64). +Layer ARNs vary by runtime and architecture. Check the [ADOT Lambda layer documentation](https://aws-otel.github.io/docs/getting-started/lambda/) for the correct ARN for your runtime (Python, Node.js, Java, .NET) and architecture (amd64, arm64). **SLO configuration** happens in the CloudWatch console or via API after deployment — define SLIs (latency percentile, error rate, availability) and set objectives with burn rate alerting. diff --git a/tools/validate-urls.py b/tools/validate-urls.py new file mode 100644 index 00000000..ae3c6e33 --- /dev/null +++ b/tools/validate-urls.py @@ -0,0 +1,232 @@ +# /// script +# requires-python = ">=3.12" +# dependencies = ["httpx"] +# /// +"""Check that HTTPS URLs in markdown files are reachable. + +Scans all .md files for HTTPS URLs, checks them with HEAD (fallback GET), +and reports broken or redirected links. Network-dependent — not wired into +the main build pipeline. Run on-demand or in scheduled CI. + +Usage: + uv run tools/validate-urls.py [--strict] + +With --strict, 301 permanent redirects and 403 responses also fail. +""" + +import asyncio +import os +import re +import sys +from pathlib import Path +from urllib.parse import urldefrag + +ROOT = Path(__file__).resolve().parent.parent +IGNORE_FILE = ROOT / ".url-check-ignore" + +CONCURRENCY = 10 +TIMEOUT = 10.0 +RETRIES = 1 + +# Match HTTPS URLs in markdown (links, raw URLs, angle-bracket URLs) +URL_RE = re.compile(r"https://[^\s)<>\"'`\]]+") + +RED = "\033[91m" +YELLOW = "\033[93m" +GREEN = "\033[92m" +DIM = "\033[2m" +BOLD = "\033[1m" +RESET = "\033[0m" + + +def load_ignore_patterns() -> list[str]: + """Load URL prefixes/patterns to skip from .url-check-ignore.""" + if not IGNORE_FILE.exists(): + return [] + patterns = [] + for line in IGNORE_FILE.read_text().splitlines(): + line = line.strip() + if line and not line.startswith("#"): + patterns.append(line) + return patterns + + +def should_ignore(url: str, patterns: list[str]) -> bool: + for pattern in patterns: + if pattern in url: + return True + return False + + +def collect_urls() -> dict[str, list[tuple[Path, int]]]: + """Scan all .md files and collect unique URLs with their locations.""" + url_locations: dict[str, list[tuple[Path, int]]] = {} + + for md_file in sorted(ROOT.rglob("*.md")): + # Skip hidden dirs, node_modules, .git, .tmp + parts = md_file.relative_to(ROOT).parts + if any(p.startswith(".") or p == "node_modules" for p in parts): + continue + + try: + text = md_file.read_text(encoding="utf-8") + except (UnicodeDecodeError, OSError): + continue + + for i, line in enumerate(text.splitlines(), 1): + for match in URL_RE.finditer(line): + raw_url = match.group(0) + # Strip trailing punctuation that's likely not part of the URL + raw_url = raw_url.rstrip(".,;:!?)") + # Strip fragment + url, _ = urldefrag(raw_url) + if url not in url_locations: + url_locations[url] = [] + url_locations[url].append((md_file, i)) + + return url_locations + + +async def check_url( + client: "httpx.AsyncClient", + url: str, + semaphore: asyncio.Semaphore, +) -> tuple[str, int | None, str | None]: + """Check a single URL. Returns (url, status_code, error_or_note).""" + import httpx + + async with semaphore: + for attempt in range(1 + RETRIES): + try: + # Try HEAD first + resp = await client.head(url, follow_redirects=True, timeout=TIMEOUT) + if resp.status_code == 405 or resp.status_code >= 500: + # Server rejects HEAD or is erroring — try GET + resp = await client.get(url, follow_redirects=True, timeout=TIMEOUT) + + # Check for permanent redirects in the redirect history + redirect_note = None + for r in resp.history: + if r.status_code == 301: + redirect_note = f"301 → {resp.url}" + break + + return (url, resp.status_code, redirect_note) + + except httpx.TimeoutException: + if attempt < RETRIES: + await asyncio.sleep(1) + continue + return (url, None, "timeout") + except (httpx.ConnectError, httpx.ReadError, httpx.RemoteProtocolError) as e: + if attempt < RETRIES: + await asyncio.sleep(1) + continue + return (url, None, str(type(e).__name__)) + except Exception as e: + return (url, None, str(e)) + + return (url, None, "unreachable") + + +async def main_async(strict: bool) -> int: + import httpx + + ignore_patterns = load_ignore_patterns() + url_locations = collect_urls() + + if not url_locations: + print(f"{GREEN}No HTTPS URLs found in markdown files.{RESET}") + return 0 + + # Filter out ignored URLs + urls_to_check = { + url: locs + for url, locs in url_locations.items() + if not should_ignore(url, ignore_patterns) + } + ignored_count = len(url_locations) - len(urls_to_check) + + print(f"{BOLD}URL Liveness Check{RESET}") + print(f"Found {len(url_locations)} unique URLs, checking {len(urls_to_check)}", end="") + if ignored_count: + print(f" ({ignored_count} ignored)", end="") + print("\n") + + # Add GitHub token if available + headers = {"User-Agent": "agent-plugins-url-checker/1.0"} + gh_token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN") + if gh_token: + headers["Authorization"] = f"Bearer {gh_token}" + + semaphore = asyncio.Semaphore(CONCURRENCY) + + async with httpx.AsyncClient(headers=headers) as client: + tasks = [ + check_url(client, url, semaphore) + for url in sorted(urls_to_check.keys()) + ] + results = await asyncio.gather(*tasks) + + # Categorize results + broken: list[tuple[str, int | None, str | None]] = [] + warnings: list[tuple[str, int | None, str | None]] = [] + ok_count = 0 + + for url, status, note in results: + if status is None: + broken.append((url, status, note)) + elif status >= 400 and status != 403: + broken.append((url, status, note)) + elif status == 403: + if strict: + broken.append((url, status, note)) + else: + warnings.append((url, status, "403 Forbidden (may be WAF/bot protection)")) + elif note and note.startswith("301"): + if strict: + broken.append((url, status, note)) + else: + warnings.append((url, status, note)) + else: + ok_count += 1 + + # Report + if broken: + print(f"{RED}Broken URLs ({len(broken)}):{RESET}") + for url, status, note in broken: + status_str = str(status) if status else note or "error" + locs = urls_to_check[url] + first_loc = locs[0] + rel = first_loc[0].relative_to(ROOT) + extra = f" (+{len(locs)-1} more)" if len(locs) > 1 else "" + print(f" {RED}[{status_str}]{RESET} {url}") + print(f" {DIM}{rel}:{first_loc[1]}{extra}{RESET}") + print() + + if warnings: + print(f"{YELLOW}Warnings ({len(warnings)}):{RESET}") + for url, status, note in warnings: + locs = urls_to_check[url] + first_loc = locs[0] + rel = first_loc[0].relative_to(ROOT) + extra = f" (+{len(locs)-1} more)" if len(locs) > 1 else "" + print(f" {YELLOW}[{status}]{RESET} {url}") + print(f" {DIM}{note} — {rel}:{first_loc[1]}{extra}{RESET}") + print() + + print( + f"{BOLD}Summary:{RESET} {ok_count} ok, {len(warnings)} warnings," + f" {len(broken)} broken out of {len(urls_to_check)} checked" + ) + + return 1 if broken else 0 + + +def main() -> int: + strict = "--strict" in sys.argv + return asyncio.run(main_async(strict)) + + +if __name__ == "__main__": + sys.exit(main()) From 16b458f813497eebb3abc30d7932185262eef08d Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon <9553966+theagenticguy@users.noreply.github.com> Date: Wed, 11 Mar 2026 09:47:27 -0500 Subject: [PATCH 3/6] fix(tools): address Copilot review feedback on PR #87 1. validate-references.py: Detect directory-style cross-skill links like `../aws-lambda-durable-functions/` by treating them as links to `../aws-lambda-durable-functions/SKILL.md`. These were previously silently skipped because the regexes required file extensions. 2. init-skill.py: Switch from str.format to string.Template to avoid crashes when description contains `{`/`}` characters. Template uses $-substitution which doesn't treat braces as special. 3. markdownlint-frontmatter.cjs: Load allowed properties from skill-frontmatter.schema.json at runtime instead of duplicating the list. Eliminates drift risk between the schema and the lint rule. Falls back to hardcoded set if schema can't be read. 4. validate-size.py: Carry skill_md Path through results tuple instead of re-scanning the skills list by name. Eliminates latent bug where duplicate skill names across plugins would match the wrong file. Dismissed: path traversal concern in validate-references.py. The tool is read-only and the all_resources set constrains output to files strictly under plugins/*/skills/*/references/. Co-Authored-By: Claude Opus 4.6 --- tools/init-skill.py | 13 ++++---- tools/markdownlint-frontmatter.cjs | 48 ++++++++++++++++++++---------- tools/validate-references.py | 5 ++++ tools/validate-size.py | 32 +++++++++----------- 4 files changed, 59 insertions(+), 39 deletions(-) diff --git a/tools/init-skill.py b/tools/init-skill.py index cccda2ee..8acd57d7 100644 --- a/tools/init-skill.py +++ b/tools/init-skill.py @@ -7,19 +7,20 @@ import re import sys from pathlib import Path +from string import Template ROOT = Path(__file__).resolve().parent.parent PLUGINS_DIR = ROOT / "plugins" NAME_RE = re.compile(r"^[a-z][a-z0-9-]*$") -SKILL_TEMPLATE = """\ +SKILL_TEMPLATE = Template("""\ --- -name: {name} +name: $name description: > - {description} + $description --- -# {title} +# $title ## When to Use @@ -40,7 +41,7 @@ ## References - `references/` — [FILL: Describe what detailed reference material is available.] -""" +""") def main() -> int: @@ -101,7 +102,7 @@ def main() -> int: title = skill_name.replace("-", " ").title() skill_dir.mkdir(parents=True) (skill_dir / "SKILL.md").write_text( - SKILL_TEMPLATE.format(name=skill_name, description=description, title=title), + SKILL_TEMPLATE.substitute(name=skill_name, description=description, title=title), encoding="utf-8", ) refs_dir = skill_dir / "references" diff --git a/tools/markdownlint-frontmatter.cjs b/tools/markdownlint-frontmatter.cjs index f8198d51..4eca0689 100644 --- a/tools/markdownlint-frontmatter.cjs +++ b/tools/markdownlint-frontmatter.cjs @@ -12,22 +12,40 @@ "use strict"; const path = require("path"); +const fs = require("fs"); -// Allowed frontmatter properties per the skill-frontmatter schema -// Core spec properties + common extensions (license, metadata) -const ALLOWED_PROPERTIES = new Set([ - "name", - "description", - "context", - "agent", - "model", - "allowed-tools", - "argument-hint", - "user-invocable", - "disable-model-invocation", - "license", - "metadata", -]); +// Allowed frontmatter properties — derived from skill-frontmatter.schema.json +// at runtime to avoid drift between the schema and this rule. +function loadAllowedProperties() { + const fallback = new Set([ + "name", + "description", + "context", + "agent", + "model", + "allowed-tools", + "argument-hint", + "user-invocable", + "disable-model-invocation", + "license", + "metadata", + ]); + + try { + const schemaPath = path.resolve(__dirname, "..", "schemas", "skill-frontmatter.schema.json"); + // nosemgrep: gitlab.eslint.detect-non-literal-fs-filename + const schema = JSON.parse(fs.readFileSync(schemaPath, "utf8")); + if (schema && schema.properties && typeof schema.properties === "object") { + return new Set(Object.keys(schema.properties)); + } + } catch { + // Fall back to hardcoded set if schema can't be read + } + + return fallback; +} + +const ALLOWED_PROPERTIES = loadAllowedProperties(); module.exports = { names: ["skill-frontmatter", "SKILL002"], diff --git a/tools/validate-references.py b/tools/validate-references.py index a3f42eed..cff1fc39 100644 --- a/tools/validate-references.py +++ b/tools/validate-references.py @@ -21,6 +21,8 @@ # Patterns for extracting file references from markdown INLINE_CODE_RE = re.compile(rf"`([^`]+\.(?:{EXT}))`") MD_LINK_RE = re.compile(rf"\[(?:[^\]]*)\]\(([^)]+\.(?:{EXT}))\)") +# Directory links: [text](../some-skill/) — treated as linking to SKILL.md +MD_DIR_LINK_RE = re.compile(r"\[(?:[^\]]*)\]\((\.\./[^)]+/)\)") # Plain-text paths containing references/ (catches table cells, prose) # Negative lookbehind prevents matching partial paths inside markdown link hrefs # e.g. won't match "references/foo.md" from "../../other-skill/references/foo.md" @@ -39,6 +41,9 @@ def extract_refs(text: str) -> set[str]: for pattern in (INLINE_CODE_RE, MD_LINK_RE): refs.update(pattern.findall(text)) refs.update(PLAIN_REF_RE.findall(text)) + # Directory links like ../other-skill/ → treat as ../other-skill/SKILL.md + for dir_ref in MD_DIR_LINK_RE.findall(text): + refs.add(dir_ref + "SKILL.md") return refs diff --git a/tools/validate-size.py b/tools/validate-size.py index 20a8a987..84647d5c 100644 --- a/tools/validate-size.py +++ b/tools/validate-size.py @@ -87,7 +87,7 @@ def main() -> int: return 0 has_errors = False - results: list[tuple[str, str, int, int, str]] = [] + results: list[tuple[str, Path, int, int, str]] = [] for plugin_name, skill_name, skill_md in skills: text = skill_md.read_text(encoding="utf-8") @@ -105,7 +105,7 @@ def main() -> int: level = "ideal" label = f"{plugin_name}/{skill_name}" - results.append((label, skill_name, lines, tokens, level)) + results.append((label, skill_md, lines, tokens, level)) # Sort by line count descending results.sort(key=lambda r: r[2], reverse=True) @@ -135,26 +135,22 @@ def main() -> int: # Show extraction candidates for oversized skills oversized = [ - (label, skill_name) - for label, skill_name, lines, _, level in results + (label, skill_md) + for label, skill_md, _, _, level in results if level in ("error", "warning") ] if oversized: print(f"\n{BOLD}Extraction candidates:{RESET}") - for label, skill_name in oversized: - # Find the matching skill path - for plugin_name, sn, skill_md in skills: - if sn == skill_name: - text = skill_md.read_text(encoding="utf-8") - candidates = find_extraction_candidates(text) - if candidates: - print(f"\n {YELLOW}{label}{RESET}:") - for c in candidates: - print(f" → {c}") - break - - error_count = sum(1 for _, _, _, _, level in results if level == "error") - warn_count = sum(1 for _, _, _, _, level in results if level == "warning") + for label, skill_md in oversized: + text = skill_md.read_text(encoding="utf-8") + candidates = find_extraction_candidates(text) + if candidates: + print(f"\n {YELLOW}{label}{RESET}:") + for c in candidates: + print(f" → {c}") + + error_count = sum(1 for *_, level in results if level == "error") + warn_count = sum(1 for *_, level in results if level == "warning") print( f"\n{BOLD}Summary:{RESET} {len(results)} skills," f" {error_count} over {WARNING_MAX} (error)," From 7d10160df6e0ef463c74e549015bb4ffd3a29c9b Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon <9553966+theagenticguy@users.noreply.github.com> Date: Wed, 11 Mar 2026 09:49:17 -0500 Subject: [PATCH 4/6] fix(tools): clamp resolve_ref candidates to repository root Belt-and-suspenders: reject any resolved path that escapes ROOT via symlinks or ../ traversal, even though the tool is read-only and output is already constrained by the all_resources set. Co-Authored-By: Claude Opus 4.6 --- tools/validate-references.py | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/tools/validate-references.py b/tools/validate-references.py index cff1fc39..c59d2adc 100644 --- a/tools/validate-references.py +++ b/tools/validate-references.py @@ -61,27 +61,42 @@ def find_skill_root(source_file: Path) -> Path | None: return None +def _is_under_root(resolved: Path) -> bool: + """Reject candidates that escape the repository root.""" + try: + resolved.relative_to(ROOT) + return True + except ValueError: + return False + + def resolve_ref(ref: str, source_file: Path) -> list[Path]: - """Resolve a reference path to candidate file paths.""" + """Resolve a reference path to candidate file paths. + + All candidates are clamped to ROOT — paths that escape the repository + via symlinks or ../ traversal are silently dropped. + """ # Skip URLs, mailto, fragment-only links if ref.startswith(("http://", "https://", "mailto:", "#")): return [] - candidates = [] + candidates: list[Path] = [] + + def _add(raw: Path) -> None: + resolved = raw.resolve() + if _is_under_root(resolved) and resolved not in candidates: + candidates.append(resolved) + # Try relative to the source file's directory - candidates.append((source_file.parent / ref).resolve()) + _add(source_file.parent / ref) # Also try relative to the skill root skill_root = find_skill_root(source_file) if skill_root: - from_root = (skill_root / ref).resolve() - if from_root not in candidates: - candidates.append(from_root) + _add(skill_root / ref) # Try relative to the skill's references/ directory # Handles convention where files in references/ use short paths like # `design-refs/foo.md` meaning `references/design-refs/foo.md` - from_refs = (skill_root / "references" / ref).resolve() - if from_refs not in candidates: - candidates.append(from_refs) + _add(skill_root / "references" / ref) return candidates From 89ba0e93ee7bb514b568c05172582770b2080eb9 Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon <9553966+theagenticguy@users.noreply.github.com> Date: Wed, 11 Mar 2026 10:16:56 -0500 Subject: [PATCH 5/6] security(tools): scope GitHub token to GitHub domains only, harden init-skill Critical fix: validate-urls.py was attaching GITHUB_TOKEN as a global Authorization header on all outbound requests. An attacker-controlled URL in a PR markdown file would exfiltrate the CI token. Now the token is only sent to github.com, api.github.com, and raw.githubusercontent.com via per-request headers. Additional hardening: - init-skill.py: Reject plugin names containing path traversal by verifying the resolved path is under PLUGINS_DIR. - init-skill.py: Switch to safe_substitute to handle $ in descriptions. - validate-urls.py: Pin httpx to >=0.28,<1 instead of unpinned. Co-Authored-By: Claude Opus 4.6 --- tools/init-skill.py | 9 ++++++--- tools/validate-urls.py | 27 ++++++++++++++++++--------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/tools/init-skill.py b/tools/init-skill.py index 8acd57d7..1fa483ac 100644 --- a/tools/init-skill.py +++ b/tools/init-skill.py @@ -57,8 +57,11 @@ def main() -> int: skill_name = sys.argv[2] description = sys.argv[3] - # Validate plugin exists - plugin_dir = PLUGINS_DIR / plugin_name + # Validate plugin exists and is under PLUGINS_DIR (no path traversal) + plugin_dir = (PLUGINS_DIR / plugin_name).resolve() + if not plugin_dir.is_relative_to(PLUGINS_DIR.resolve()): + print(f"Error: plugin name must not contain path traversal: '{plugin_name}'") + return 1 if not plugin_dir.is_dir(): print(f"Error: plugin '{plugin_name}' not found at {plugin_dir}") available = sorted( @@ -102,7 +105,7 @@ def main() -> int: title = skill_name.replace("-", " ").title() skill_dir.mkdir(parents=True) (skill_dir / "SKILL.md").write_text( - SKILL_TEMPLATE.substitute(name=skill_name, description=description, title=title), + SKILL_TEMPLATE.safe_substitute(name=skill_name, description=description, title=title), encoding="utf-8", ) refs_dir = skill_dir / "references" diff --git a/tools/validate-urls.py b/tools/validate-urls.py index ae3c6e33..ae938f0c 100644 --- a/tools/validate-urls.py +++ b/tools/validate-urls.py @@ -1,6 +1,6 @@ # /// script # requires-python = ">=3.12" -# dependencies = ["httpx"] +# dependencies = ["httpx>=0.28,<1"] # /// """Check that HTTPS URLs in markdown files are reachable. @@ -91,18 +91,28 @@ async def check_url( client: "httpx.AsyncClient", url: str, semaphore: asyncio.Semaphore, + gh_token: str | None = None, + gh_hosts: tuple[str, ...] = (), ) -> tuple[str, int | None, str | None]: """Check a single URL. Returns (url, status_code, error_or_note).""" import httpx + from urllib.parse import urlparse + + # Only attach auth header for GitHub domains — never leak tokens to third parties + req_headers: dict[str, str] = {} + if gh_token: + host = urlparse(url).hostname or "" + if any(host == h or host.endswith("." + h) for h in gh_hosts): + req_headers["Authorization"] = f"Bearer {gh_token}" async with semaphore: for attempt in range(1 + RETRIES): try: # Try HEAD first - resp = await client.head(url, follow_redirects=True, timeout=TIMEOUT) + resp = await client.head(url, headers=req_headers, follow_redirects=True, timeout=TIMEOUT) if resp.status_code == 405 or resp.status_code >= 500: # Server rejects HEAD or is erroring — try GET - resp = await client.get(url, follow_redirects=True, timeout=TIMEOUT) + resp = await client.get(url, headers=req_headers, follow_redirects=True, timeout=TIMEOUT) # Check for permanent redirects in the redirect history redirect_note = None @@ -153,17 +163,16 @@ async def main_async(strict: bool) -> int: print(f" ({ignored_count} ignored)", end="") print("\n") - # Add GitHub token if available - headers = {"User-Agent": "agent-plugins-url-checker/1.0"} + # GitHub token for rate limit avoidance — ONLY sent to GitHub domains + base_headers = {"User-Agent": "agent-plugins-url-checker/1.0"} gh_token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN") - if gh_token: - headers["Authorization"] = f"Bearer {gh_token}" + gh_hosts = ("github.com", "api.github.com", "raw.githubusercontent.com") semaphore = asyncio.Semaphore(CONCURRENCY) - async with httpx.AsyncClient(headers=headers) as client: + async with httpx.AsyncClient(headers=base_headers) as client: tasks = [ - check_url(client, url, semaphore) + check_url(client, url, semaphore, gh_token=gh_token, gh_hosts=gh_hosts) for url in sorted(urls_to_check.keys()) ] results = await asyncio.gather(*tasks) From 9ac56b9cac35a824d832ebc108286372c9b934a5 Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon <9553966+theagenticguy@users.noreply.github.com> Date: Wed, 11 Mar 2026 10:32:36 -0500 Subject: [PATCH 6/6] fix(tools): add uv to mise.toml managed tools CI was failing because uv is not available in the GitHub Actions runner environment. Add uv as a mise-managed tool so `mise install` provisions it alongside node, markdownlint, etc. Co-Authored-By: Claude Opus 4.6 --- mise.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/mise.toml b/mise.toml index 560b2eb6..7c7d56a3 100644 --- a/mise.toml +++ b/mise.toml @@ -4,6 +4,7 @@ min_version = "2026.2.4" [tools] +uv = "0.10" node = "24" "npm:markdownlint-cli2" = "0.17" "npm:ajv-cli" = "5"