diff --git a/.github/workflows/test_links.yml b/.github/workflows/test_links.yml new file mode 100644 index 000000000..e8d382c33 --- /dev/null +++ b/.github/workflows/test_links.yml @@ -0,0 +1,66 @@ +name: Link Check and Automated Issue + +on: + workflow_dispatch: + +jobs: + check-links: + runs-on: ubuntu-latest + outputs: + should_create_issue: ${{ steps.detect.outputs.issue_needed }} + steps: + - name: Checkout repository + uses: actions/checkout@v4.2.2 + + # Run your link checker and generate log + - name: Run LinkChecker + run: | + bazel run //:link_check > linkcheck_output.txt + continue-on-error: true + + # Run your Python script to parse the linkcheck log and generate issue body + - name: Parse broken links and generate issue body + run: | + python3 scripts/link_parser.py linkcheck_output.txt + + # Check if issue_body.md exists and is not empty + - name: Check for issues to report + id: detect + run: | + if [ -s issue_body.md ]; then + echo "issue_needed=true" >> "$GITHUB_OUTPUT" + else + echo "issue_needed=false" >> "$GITHUB_OUTPUT" + fi + + # Upload issue body artifact if present + - name: Upload issue body + if: steps.detect.outputs.issue_needed == 'true' + uses: actions/upload-artifact@v4 + with: + name: issue-body + path: issue_body.md + + create-issue: + needs: check-links + if: needs.check-links.outputs.should_create_issue == 'true' + runs-on: ubuntu-latest + steps: + - name: Download issue body artifact + uses: actions/download-artifact@v4 + with: + name: issue-body + + - name: Create GitHub issue from findings + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const fs = require('fs'); + const body = fs.readFileSync('issue_body.md', 'utf-8'); + github.rest.issues.create({ + owner: context.repo.owner, + repo: context.repo.repo, + title: "Automated Issue: Broken Documentation Links", + body, + }); diff --git a/docs.bzl b/docs.bzl index 00f1c676c..9860bd664 100644 --- a/docs.bzl +++ b/docs.bzl @@ -128,6 +128,19 @@ def docs(source_dir = "docs", data = [], deps = []): }, ) + py_binary( + name = "link_check", + tags = ["cli_help=Verify Links inside Documentation:\nbazel run //:link_check\n (Note: this could take a long time)"], + srcs = ["@score_docs_as_code//src:incremental.py"], + data = data, + deps = deps, + env = { + "SOURCE_DIRECTORY": source_dir, + "DATA": str(data), + "ACTION": "linkcheck", + }, + ) + py_binary( name = "docs_check", tags = ["cli_help=Verify documentation:\nbazel run //:docs_check"], diff --git a/scripts/link_parser.py b/scripts/link_parser.py new file mode 100644 index 000000000..7c20a5e55 --- /dev/null +++ b/scripts/link_parser.py @@ -0,0 +1,100 @@ +import argparse +import re +import sys +from dataclasses import dataclass + +PARSING_STATUSES = ["broken"] + + +@dataclass +class BrokenLink: + location: str + line_nr: str + url: str + status: str + reasoning: str + + +def parse_broken_links(log: str) -> list[BrokenLink]: + broken_links: list[BrokenLink] = [] + lines = log.strip().split("\n") + + for line in lines: + parts = line.split(") ") + if len(parts) < 2: + continue + + location_part = parts[0].replace("(", "").strip() + location = location_part.split(":")[0].strip() + line_nr = location_part.split("line")[-1].strip() + status_and_url_part = parts[1] + + if not any(status in status_and_url_part for status in PARSING_STATUSES): + continue + status_and_url = status_and_url_part.split(" - ") + # status = list(filter(None,status_and_url.split(' '))) + if len(status_and_url) < 2: + continue + status, url = status_and_url[0].strip().split() + reasoning = status_and_url[1].strip() + + broken_links.append( + BrokenLink( + location=location, + line_nr=line_nr, + url=url, + status=status, + reasoning=reasoning, + ) + ) + + return broken_links + + +def generate_markdown_table(broken_links: list[BrokenLink]) -> str: + table = "| Location | Line Number | URL | Status | Reasoning |\n" + table += "|----------|-------------|-----|--------|-----------|\n" + + for link in broken_links: + table += ( + f"| {link.location} | {link.line_nr} | " + f"{link.url} | {link.status} | {link.reasoning} |\n" + ) + + return table + + +def generate_issue_body(broken_links: list[BrokenLink]) -> str: + markdown_table = generate_markdown_table(broken_links) + return f""" +# Broken Links Report +The following broken links were detected in the documentation: +{markdown_table} +Please investigate and fix these issues to ensure all links are functional. +Thank you! +""" + + +def strip_ansi_codes(text: str) -> str: + """Remove ANSI escape sequences from text""" + ansi_escape = re.compile(r"\x1b\[[0-9;]*m") + return ansi_escape.sub("", text) + + +if __name__ == "__main__": + argparse = argparse.ArgumentParser( + description="Parse broken links from Sphinx log and generate issue body." + ) + argparse.add_argument("logfile", type=str, help="Path to the Sphinx log file.") + args = argparse.parse_args() + with open(args.logfile) as f: + log_content_raw = f.read() + log_content = strip_ansi_codes(log_content_raw) + broken_links = parse_broken_links(log_content) + if not broken_links: + # Nothing broken found, can exit early + sys.exit(0) + issue_body = generate_issue_body(broken_links) + if broken_links: + with open("issue_body.md", "w") as out: + out.write(issue_body) diff --git a/src/incremental.py b/src/incremental.py index fbabf8b1a..1c3816229 100644 --- a/src/incremental.py +++ b/src/incremental.py @@ -109,6 +109,8 @@ def get_env(name: str) -> str: builder = "html" elif action == "check": builder = "needs" + elif action == "linkcheck": + builder = "linkcheck" else: raise ValueError(f"Unknown action: {action}")