Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions .github/workflows/test_links.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
name: Link Check and Automated Issue

on:
workflow_dispatch:

jobs:
check-links:
runs-on: ubuntu-latest
outputs:
should_create_issue: ${{ steps.detect.outputs.issue_needed }}
steps:
- name: Checkout repository
uses: actions/checkout@v4.2.2

# Run your link checker and generate log
- name: Run LinkChecker
run: |
bazel run //:link_check > linkcheck_output.txt
continue-on-error: true

# Run your Python script to parse the linkcheck log and generate issue body
- name: Parse broken links and generate issue body
run: |
python3 scripts/link_parser.py linkcheck_output.txt

# Check if issue_body.md exists and is not empty
- name: Check for issues to report
id: detect
run: |
if [ -s issue_body.md ]; then
echo "issue_needed=true" >> "$GITHUB_OUTPUT"
else
echo "issue_needed=false" >> "$GITHUB_OUTPUT"
fi

# Upload issue body artifact if present
- name: Upload issue body
if: steps.detect.outputs.issue_needed == 'true'
uses: actions/upload-artifact@v4
with:
name: issue-body
path: issue_body.md

create-issue:
needs: check-links
if: needs.check-links.outputs.should_create_issue == 'true'
runs-on: ubuntu-latest
steps:
- name: Download issue body artifact
uses: actions/download-artifact@v4
with:
name: issue-body

- name: Create GitHub issue from findings
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const fs = require('fs');
const body = fs.readFileSync('issue_body.md', 'utf-8');
github.rest.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title: "Automated Issue: Broken Documentation Links",
body,
});
13 changes: 13 additions & 0 deletions docs.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,19 @@ def docs(source_dir = "docs", data = [], deps = []):
},
)

py_binary(
name = "link_check",
tags = ["cli_help=Verify Links inside Documentation:\nbazel run //:link_check\n (Note: this could take a long time)"],
srcs = ["@score_docs_as_code//src:incremental.py"],
data = data,
deps = deps,
env = {
"SOURCE_DIRECTORY": source_dir,
"DATA": str(data),
"ACTION": "linkcheck",
},
)

py_binary(
name = "docs_check",
tags = ["cli_help=Verify documentation:\nbazel run //:docs_check"],
Expand Down
100 changes: 100 additions & 0 deletions scripts/link_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import argparse
import re
import sys
from dataclasses import dataclass

PARSING_STATUSES = ["broken"]


@dataclass
class BrokenLink:
location: str
line_nr: str
url: str
status: str
reasoning: str


def parse_broken_links(log: str) -> list[BrokenLink]:
broken_links: list[BrokenLink] = []
lines = log.strip().split("\n")

for line in lines:
parts = line.split(") ")
if len(parts) < 2:
continue

location_part = parts[0].replace("(", "").strip()
location = location_part.split(":")[0].strip()
line_nr = location_part.split("line")[-1].strip()
status_and_url_part = parts[1]

if not any(status in status_and_url_part for status in PARSING_STATUSES):
continue
status_and_url = status_and_url_part.split(" - ")
# status = list(filter(None,status_and_url.split(' ')))
if len(status_and_url) < 2:
continue
status, url = status_and_url[0].strip().split()
reasoning = status_and_url[1].strip()

broken_links.append(
BrokenLink(
location=location,
line_nr=line_nr,
url=url,
status=status,
reasoning=reasoning,
)
)

return broken_links


def generate_markdown_table(broken_links: list[BrokenLink]) -> str:
table = "| Location | Line Number | URL | Status | Reasoning |\n"
table += "|----------|-------------|-----|--------|-----------|\n"

for link in broken_links:
table += (
f"| {link.location} | {link.line_nr} | "
f"{link.url} | {link.status} | {link.reasoning} |\n"
)

return table


def generate_issue_body(broken_links: list[BrokenLink]) -> str:
markdown_table = generate_markdown_table(broken_links)
return f"""
# Broken Links Report
The following broken links were detected in the documentation:
{markdown_table}
Please investigate and fix these issues to ensure all links are functional.
Thank you!
"""


def strip_ansi_codes(text: str) -> str:
"""Remove ANSI escape sequences from text"""
ansi_escape = re.compile(r"\x1b\[[0-9;]*m")
return ansi_escape.sub("", text)


if __name__ == "__main__":
argparse = argparse.ArgumentParser(
description="Parse broken links from Sphinx log and generate issue body."
)
argparse.add_argument("logfile", type=str, help="Path to the Sphinx log file.")
args = argparse.parse_args()
with open(args.logfile) as f:
log_content_raw = f.read()
log_content = strip_ansi_codes(log_content_raw)
broken_links = parse_broken_links(log_content)
if not broken_links:
# Nothing broken found, can exit early
sys.exit(0)
issue_body = generate_issue_body(broken_links)
if broken_links:
with open("issue_body.md", "w") as out:
out.write(issue_body)
2 changes: 2 additions & 0 deletions src/incremental.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ def get_env(name: str) -> str:
builder = "html"
elif action == "check":
builder = "needs"
elif action == "linkcheck":
builder = "linkcheck"
else:
raise ValueError(f"Unknown action: {action}")

Expand Down
Loading