From f27511a5364aae33da4d24ef2ea1683996ac5522 Mon Sep 17 00:00:00 2001 From: CookieSource <36531905+CookieSource@users.noreply.github.com> Date: Tue, 13 Jan 2026 18:47:24 +0100 Subject: [PATCH 01/22] Add GitHub Action for LanguageTool on PRs --- .github/workflows/languagetool-pr.yml | 77 +++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 .github/workflows/languagetool-pr.yml diff --git a/.github/workflows/languagetool-pr.yml b/.github/workflows/languagetool-pr.yml new file mode 100644 index 00000000..f526a8d5 --- /dev/null +++ b/.github/workflows/languagetool-pr.yml @@ -0,0 +1,77 @@ +name: LanguageTool (PR) + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + +jobs: + languagetool: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Java + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: "17" + + - name: Download LanguageTool + run: | + set -euo pipefail + LT_VERSION="6.4" + curl -fsSL -o LT.zip "https://languagetool.org/download/LanguageTool-${LT_VERSION}.zip" + unzip -q LT.zip + echo "LT_DIR=LanguageTool-${LT_VERSION}" >> "$GITHUB_ENV" + + - name: Run LanguageTool on changed files + env: + BASE_SHA: ${{ github.event.pull_request.base.sha }} + HEAD_SHA: ${{ github.event.pull_request.head.sha }} + run: | + set -euo pipefail + + echo "Base: $BASE_SHA" + echo "Head: $HEAD_SHA" + + # Adjust file types as you like: + mapfile -t FILES < <(git diff --name-only "$BASE_SHA" "$HEAD_SHA" \ + | grep -E '\.(md|rst|txt)$' || true) + + if [ "${#FILES[@]}" -eq 0 ]; then + echo "No matching files changed. Skipping." + exit 0 + fi + + echo "Files to check:" + printf ' - %s\n' "${FILES[@]}" + + JAR="$(ls -1 "$LT_DIR"/languagetool-commandline.jar)" + + # Pick your language here: + LANG="en-US" + + issues=0 + for f in "${FILES[@]}"; do + echo "-----" + echo "Checking: $f" + # LanguageTool returns 0 even with matches, so we count output ourselves. + out="$(java -jar "$JAR" -l "$LANG" "$f" || true)" + if [ -n "$out" ]; then + issues=1 + echo "$out" + else + echo "OK" + fi + done + + if [ "$issues" -ne 0 ]; then + echo "LanguageTool found issues." + exit 1 + fi + + echo "No LanguageTool issues found." From 650be5834d65ffbdc791f8c84f0079ac9b698160 Mon Sep 17 00:00:00 2001 From: CookieSource <36531905+CookieSource@users.noreply.github.com> Date: Tue, 13 Jan 2026 18:47:50 +0100 Subject: [PATCH 02/22] Add support for .mdx files in language tool workflow --- .github/workflows/languagetool-pr.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/languagetool-pr.yml b/.github/workflows/languagetool-pr.yml index f526a8d5..27ede8bd 100644 --- a/.github/workflows/languagetool-pr.yml +++ b/.github/workflows/languagetool-pr.yml @@ -40,7 +40,7 @@ jobs: # Adjust file types as you like: mapfile -t FILES < <(git diff --name-only "$BASE_SHA" "$HEAD_SHA" \ - | grep -E '\.(md|rst|txt)$' || true) + | grep -E '\.(md|rst|txt|mdx)$' || true) if [ "${#FILES[@]}" -eq 0 ]; then echo "No matching files changed. Skipping." From dc77791ef81a5fc3ef6ee3196750de2439a0871f Mon Sep 17 00:00:00 2001 From: CookieSource <36531905+CookieSource@users.noreply.github.com> Date: Tue, 13 Jan 2026 18:51:21 +0100 Subject: [PATCH 03/22] Delete .github/workflows/deploy.yml --- .github/workflows/deploy.yml | 43 ------------------------------------ 1 file changed, 43 deletions(-) delete mode 100644 .github/workflows/deploy.yml diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml deleted file mode 100644 index f9fa42a3..00000000 --- a/.github/workflows/deploy.yml +++ /dev/null @@ -1,43 +0,0 @@ -name: Deploy to GitHub Pages - -on: - # Trigger the workflow every time you push to the `main` branch - # Using a different branch name? Replace `main` with your branch’s name - push: - branches: [ main ] - # Allows you to run this workflow manually from the Actions tab on GitHub. - workflow_dispatch: - -# Allow this job to clone the repo and create a page deployment -permissions: - contents: read - pages: write - id-token: write - -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: Checkout your repository using git - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - name: Install, build, and upload your site - uses: withastro/action@v3 - with: - path: . # The root location of your Astro project inside the repository. (optional) - node-version: 20 # The specific version of Node that should be used to build your site. Defaults to 20. (optional) - package-manager: npm # The Node package manager that should be used to install dependencies and build your site. Automatically detected based on your lockfile. (optional) - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - deploy: - needs: build - runs-on: ubuntu-latest - environment: - name: github-pages - url: ${{ steps.deployment.outputs.page_url }} - steps: - - name: Deploy to GitHub Pages - id: deployment - uses: actions/deploy-pages@v4 From c00b21e6dcc5301ac7cb19a8a57d44439878776f Mon Sep 17 00:00:00 2001 From: CookieSource <36531905+CookieSource@users.noreply.github.com> Date: Tue, 13 Jan 2026 18:58:38 +0100 Subject: [PATCH 04/22] Refactor LanguageTool workflow for clarity and efficiency Updated the LanguageTool GitHub Actions workflow to improve file type checks and output handling. --- .github/workflows/languagetool-pr.yml | 36 +++++++++++++++++++++------ 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/.github/workflows/languagetool-pr.yml b/.github/workflows/languagetool-pr.yml index 27ede8bd..bea33e75 100644 --- a/.github/workflows/languagetool-pr.yml +++ b/.github/workflows/languagetool-pr.yml @@ -28,7 +28,7 @@ jobs: unzip -q LT.zip echo "LT_DIR=LanguageTool-${LT_VERSION}" >> "$GITHUB_ENV" - - name: Run LanguageTool on changed files + - name: Run LanguageTool on changed PR files (cleaned) env: BASE_SHA: ${{ github.event.pull_request.base.sha }} HEAD_SHA: ${{ github.event.pull_request.head.sha }} @@ -38,9 +38,9 @@ jobs: echo "Base: $BASE_SHA" echo "Head: $HEAD_SHA" - # Adjust file types as you like: + # File types to check (add/remove as needed) mapfile -t FILES < <(git diff --name-only "$BASE_SHA" "$HEAD_SHA" \ - | grep -E '\.(md|rst|txt|mdx)$' || true) + | grep -E '\.(md|mdx|rst|txt)$' || true) if [ "${#FILES[@]}" -eq 0 ]; then echo "No matching files changed. Skipping." @@ -51,16 +51,38 @@ jobs: printf ' - %s\n' "${FILES[@]}" JAR="$(ls -1 "$LT_DIR"/languagetool-commandline.jar)" - - # Pick your language here: LANG="en-US" issues=0 + for f in "${FILES[@]}"; do echo "-----" echo "Checking: $f" - # LanguageTool returns 0 even with matches, so we count output ourselves. - out="$(java -jar "$JAR" -l "$LANG" "$f" || true)" + + tmp="$(mktemp)" + + # Keep "nearly all" errors but remove the biggest source of noise: + # - YAML frontmatter at top of file (--- ... ---) + # - fenced code blocks (``` ... ```) + # + # Everything else is checked (including normal prose in MDX). + awk ' + BEGIN { fm=0; code=0; } + NR==1 && $0=="---" { fm=1; next } + fm==1 && $0=="---" { fm=0; next } + fm==1 { next } + + /^```/ { code = !code; next } + code==1 { next } + + { print } + ' "$f" > "$tmp" + + # LanguageTool CLI prints findings to stdout; it typically exits 0 even with findings, + # so we detect findings by whether output is non-empty. + out="$(java -jar "$JAR" -l "$LANG" "$tmp" || true)" + rm -f "$tmp" + if [ -n "$out" ]; then issues=1 echo "$out" From 072c78e347a43c9dc7fae3a572e7855c0ea03a6b Mon Sep 17 00:00:00 2001 From: CookieSource <36531905+CookieSource@users.noreply.github.com> Date: Tue, 13 Jan 2026 19:07:17 +0100 Subject: [PATCH 05/22] Update languagetool-pr.yml --- .github/workflows/languagetool-pr.yml | 115 ++++++++++++++++++++------ 1 file changed, 91 insertions(+), 24 deletions(-) diff --git a/.github/workflows/languagetool-pr.yml b/.github/workflows/languagetool-pr.yml index bea33e75..1865d70f 100644 --- a/.github/workflows/languagetool-pr.yml +++ b/.github/workflows/languagetool-pr.yml @@ -4,6 +4,10 @@ on: pull_request: types: [opened, synchronize, reopened, ready_for_review] +permissions: + contents: read + pull-requests: write + jobs: languagetool: runs-on: ubuntu-latest @@ -28,17 +32,19 @@ jobs: unzip -q LT.zip echo "LT_DIR=LanguageTool-${LT_VERSION}" >> "$GITHUB_ENV" - - name: Run LanguageTool on changed PR files (cleaned) + - name: Run LanguageTool on changed PR files + comment summary env: BASE_SHA: ${{ github.event.pull_request.base.sha }} HEAD_SHA: ${{ github.event.pull_request.head.sha }} + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.pull_request.number }} + REPO: ${{ github.repository }} run: | set -euo pipefail echo "Base: $BASE_SHA" echo "Head: $HEAD_SHA" - # File types to check (add/remove as needed) mapfile -t FILES < <(git diff --name-only "$BASE_SHA" "$HEAD_SHA" \ | grep -E '\.(md|mdx|rst|txt)$' || true) @@ -54,6 +60,8 @@ jobs: LANG="en-US" issues=0 + REPORT_FILE="$(mktemp)" + : > "$REPORT_FILE" for f in "${FILES[@]}"; do echo "-----" @@ -61,39 +69,98 @@ jobs: tmp="$(mktemp)" - # Keep "nearly all" errors but remove the biggest source of noise: - # - YAML frontmatter at top of file (--- ... ---) - # - fenced code blocks (``` ... ```) - # - # Everything else is checked (including normal prose in MDX). - awk ' - BEGIN { fm=0; code=0; } - NR==1 && $0=="---" { fm=1; next } - fm==1 && $0=="---" { fm=0; next } - fm==1 { next } - - /^```/ { code = !code; next } - code==1 { next } - - { print } - ' "$f" > "$tmp" - - # LanguageTool CLI prints findings to stdout; it typically exits 0 even with findings, - # so we detect findings by whether output is non-empty. + # Robust preprocessing (won't fail the job if it errors; falls back to original file) + if ! python3 - "$f" > "$tmp" 2>/dev/null << 'PY' +import re, sys +path = sys.argv[1] +text = open(path, "r", encoding="utf-8", errors="replace").read() + +# Remove YAML frontmatter at top +if text.startswith("---\n"): + m = re.match(r"^---\n.*?\n---\n", text, flags=re.S) + if m: + text = text[m.end():] + +# Remove fenced code blocks +text = re.sub(r"^```.*?$.*?^```.*?$", "\n", text, flags=re.S | re.M) + +# Remove inline code spans +text = re.sub(r"`[^`]*`", " ", text) + +# Neutralize common technical tokens (paths, filenames/exts, long identifiers) +text = re.sub(r"\b(?:~?/)?[A-Za-z0-9._-]+(?:/[A-Za-z0-9._-]+)+\b", " PATH ", text) + +exts = r"(so|a|o|dylib|dll|exe|bin|iso|img|qcow2|raw|tar|gz|bz2|xz|zip|7z|deb|rpm|jar|war|py|js|ts|jsx|tsx|java|c|cc|cpp|h|hpp|rs|go|rb|php|sh|yaml|yml|toml|json|xml|md|mdx|rst|txt)" +text = re.sub(rf"\b[A-Za-z0-9._-]+\.(?:{exts})\b", " FILE ", text, flags=re.I) + +text = re.sub(r"\b[A-Za-z][A-Za-z0-9_-]{14,}\b", " IDENT ", text) + +text = re.sub(r"[ \t]+", " ", text) +sys.stdout.write(text) +PY + then + cp "$f" "$tmp" + fi + out="$(java -jar "$JAR" -l "$LANG" "$tmp" || true)" rm -f "$tmp" if [ -n "$out" ]; then issues=1 echo "$out" + { + echo "FILE: $f" + echo "$out" + echo + } >> "$REPORT_FILE" else echo "OK" fi done + # Build PR comment body (upsert by marker) + MARKER="" + if [ "$issues" -ne 0 ]; then - echo "LanguageTool found issues." - exit 1 + BODY_FILE="$(mktemp)" + { + echo "$MARKER" + echo "### LanguageTool findings" + echo + echo "_Checked files changed in this PR (frontmatter + code blocks removed; inline code stripped)._" + echo + echo '```' + cat "$REPORT_FILE" + echo '```' + } > "$BODY_FILE" + + # Find existing comment with marker (if any) and update it; otherwise create a new one + COMMENTS_JSON="$(mktemp)" + gh api "repos/$REPO/issues/$PR_NUMBER/comments?per_page=100" > "$COMMENTS_JSON" + + COMMENT_ID="$(python3 - << 'PY' +import json, sys +data = json.load(open(sys.argv[1], "r", encoding="utf-8")) +for c in data: + if "" in (c.get("body") or ""): + print(c["id"]) + break +PY + "$COMMENTS_JSON")" + + if [ -n "${COMMENT_ID:-}" ]; then + gh api -X PATCH "repos/$REPO/issues/comments/$COMMENT_ID" -f body="$(cat "$BODY_FILE")" >/dev/null + echo "Updated existing LanguageTool comment." + else + gh api -X POST "repos/$REPO/issues/$PR_NUMBER/comments" -f body="$(cat "$BODY_FILE")" >/dev/null + echo "Posted new LanguageTool comment." + fi + else + echo "No LanguageTool issues found." fi - echo "No LanguageTool issues found." + rm -f "$REPORT_FILE" || true + + if [ "$issues" -ne 0 ]; then + exit 1 + fi From 2c9d531234cd5ce14bacda72703df0658f0f4eb4 Mon Sep 17 00:00:00 2001 From: CookieSource <36531905+CookieSource@users.noreply.github.com> Date: Tue, 13 Jan 2026 19:10:47 +0100 Subject: [PATCH 06/22] Update languagetool-pr.yml --- .github/workflows/languagetool-pr.yml | 103 ++++++++++++++++---------- 1 file changed, 64 insertions(+), 39 deletions(-) diff --git a/.github/workflows/languagetool-pr.yml b/.github/workflows/languagetool-pr.yml index 1865d70f..b21a5321 100644 --- a/.github/workflows/languagetool-pr.yml +++ b/.github/workflows/languagetool-pr.yml @@ -1,8 +1,10 @@ -name: LanguageTool (PR) +name: LanguageTool on: pull_request: types: [opened, synchronize, reopened, ready_for_review] + push: + branches: ["**"] permissions: contents: read @@ -32,20 +34,35 @@ jobs: unzip -q LT.zip echo "LT_DIR=LanguageTool-${LT_VERSION}" >> "$GITHUB_ENV" - - name: Run LanguageTool on changed PR files + comment summary + - name: Run LanguageTool (changed files) and comment on PR env: - BASE_SHA: ${{ github.event.pull_request.base.sha }} - HEAD_SHA: ${{ github.event.pull_request.head.sha }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - PR_NUMBER: ${{ github.event.pull_request.number }} REPO: ${{ github.repository }} + EVENT_NAME: ${{ github.event_name }} + PR_NUMBER: ${{ github.event.pull_request.number }} + BASE_SHA: ${{ github.event.pull_request.base.sha }} + HEAD_SHA: ${{ github.event.pull_request.head.sha }} run: | set -euo pipefail - echo "Base: $BASE_SHA" - echo "Head: $HEAD_SHA" + # Only comment when this run is for a PR event + IS_PR=0 + if [ "$EVENT_NAME" = "pull_request" ]; then + IS_PR=1 + fi + + if [ "$IS_PR" -eq 1 ]; then + echo "Base: $BASE_SHA" + echo "Head: $HEAD_SHA" + DIFF_BASE="$BASE_SHA" + DIFF_HEAD="$HEAD_SHA" + else + echo "Push run: comparing against previous commit" + DIFF_BASE="${GITHUB_SHA}^" + DIFF_HEAD="${GITHUB_SHA}" + fi - mapfile -t FILES < <(git diff --name-only "$BASE_SHA" "$HEAD_SHA" \ + mapfile -t FILES < <(git diff --name-only "$DIFF_BASE" "$DIFF_HEAD" \ | grep -E '\.(md|mdx|rst|txt)$' || true) if [ "${#FILES[@]}" -eq 0 ]; then @@ -69,17 +86,17 @@ jobs: tmp="$(mktemp)" - # Robust preprocessing (won't fail the job if it errors; falls back to original file) - if ! python3 - "$f" > "$tmp" 2>/dev/null << 'PY' + # Preprocess with Python. If preprocessing fails, fall back to the original file. + if python3 - "$f" > "$tmp" 2>/dev/null <<'PY' import re, sys path = sys.argv[1] text = open(path, "r", encoding="utf-8", errors="replace").read() # Remove YAML frontmatter at top if text.startswith("---\n"): - m = re.match(r"^---\n.*?\n---\n", text, flags=re.S) - if m: - text = text[m.end():] + m = re.match(r"^---\n.*?\n---\n", text, flags=re.S) + if m: + text = text[m.end():] # Remove fenced code blocks text = re.sub(r"^```.*?$.*?^```.*?$", "\n", text, flags=re.S | re.M) @@ -87,18 +104,22 @@ text = re.sub(r"^```.*?$.*?^```.*?$", "\n", text, flags=re.S | re.M) # Remove inline code spans text = re.sub(r"`[^`]*`", " ", text) -# Neutralize common technical tokens (paths, filenames/exts, long identifiers) +# Neutralize path-ish tokens text = re.sub(r"\b(?:~?/)?[A-Za-z0-9._-]+(?:/[A-Za-z0-9._-]+)+\b", " PATH ", text) +# Neutralize common filename tokens with extensions exts = r"(so|a|o|dylib|dll|exe|bin|iso|img|qcow2|raw|tar|gz|bz2|xz|zip|7z|deb|rpm|jar|war|py|js|ts|jsx|tsx|java|c|cc|cpp|h|hpp|rs|go|rb|php|sh|yaml|yml|toml|json|xml|md|mdx|rst|txt)" text = re.sub(rf"\b[A-Za-z0-9._-]+\.(?:{exts})\b", " FILE ", text, flags=re.I) +# Neutralize very long identifier-ish tokens text = re.sub(r"\b[A-Za-z][A-Za-z0-9_-]{14,}\b", " IDENT ", text) text = re.sub(r"[ \t]+", " ", text) sys.stdout.write(text) PY then + : + else cp "$f" "$tmp" fi @@ -118,35 +139,43 @@ PY fi done - # Build PR comment body (upsert by marker) - MARKER="" - - if [ "$issues" -ne 0 ]; then + # If PR: upsert a single comment with a marker + if [ "$IS_PR" -eq 1 ]; then + MARKER="" BODY_FILE="$(mktemp)" - { - echo "$MARKER" - echo "### LanguageTool findings" - echo - echo "_Checked files changed in this PR (frontmatter + code blocks removed; inline code stripped)._" - echo - echo '```' - cat "$REPORT_FILE" - echo '```' - } > "$BODY_FILE" - - # Find existing comment with marker (if any) and update it; otherwise create a new one + + if [ "$issues" -ne 0 ]; then + { + echo "$MARKER" + echo "### LanguageTool findings" + echo + echo "_Checked files changed in this PR (frontmatter + fenced code removed; inline code stripped)._" + echo + echo '```' + cat "$REPORT_FILE" + echo '```' + } > "$BODY_FILE" + else + { + echo "$MARKER" + echo "### LanguageTool findings" + echo + echo "✅ No issues found in changed files." + } > "$BODY_FILE" + fi + COMMENTS_JSON="$(mktemp)" gh api "repos/$REPO/issues/$PR_NUMBER/comments?per_page=100" > "$COMMENTS_JSON" - COMMENT_ID="$(python3 - << 'PY' + COMMENT_ID="$(python3 - "$COMMENTS_JSON" <<'PY' import json, sys data = json.load(open(sys.argv[1], "r", encoding="utf-8")) for c in data: - if "" in (c.get("body") or ""): - print(c["id"]) - break + if "" in (c.get("body") or ""): + print(c["id"]) + break PY - "$COMMENTS_JSON")" + )" if [ -n "${COMMENT_ID:-}" ]; then gh api -X PATCH "repos/$REPO/issues/comments/$COMMENT_ID" -f body="$(cat "$BODY_FILE")" >/dev/null @@ -155,12 +184,8 @@ PY gh api -X POST "repos/$REPO/issues/$PR_NUMBER/comments" -f body="$(cat "$BODY_FILE")" >/dev/null echo "Posted new LanguageTool comment." fi - else - echo "No LanguageTool issues found." fi - rm -f "$REPORT_FILE" || true - if [ "$issues" -ne 0 ]; then exit 1 fi From 31143113b8a1701f265c075516a12200a19cc762 Mon Sep 17 00:00:00 2001 From: CookieSource <36531905+CookieSource@users.noreply.github.com> Date: Tue, 13 Jan 2026 19:12:54 +0100 Subject: [PATCH 07/22] Update LanguageTool workflow for PR comments --- .github/workflows/languagetool-pr.yml | 129 ++++++++++++-------------- 1 file changed, 59 insertions(+), 70 deletions(-) diff --git a/.github/workflows/languagetool-pr.yml b/.github/workflows/languagetool-pr.yml index b21a5321..92a5f038 100644 --- a/.github/workflows/languagetool-pr.yml +++ b/.github/workflows/languagetool-pr.yml @@ -1,10 +1,8 @@ -name: LanguageTool +name: LanguageTool (PR) on: pull_request: types: [opened, synchronize, reopened, ready_for_review] - push: - branches: ["**"] permissions: contents: read @@ -13,7 +11,6 @@ permissions: jobs: languagetool: runs-on: ubuntu-latest - steps: - name: Checkout uses: actions/checkout@v4 @@ -34,35 +31,20 @@ jobs: unzip -q LT.zip echo "LT_DIR=LanguageTool-${LT_VERSION}" >> "$GITHUB_ENV" - - name: Run LanguageTool (changed files) and comment on PR + - name: Run LanguageTool on changed PR files + comment summary env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} REPO: ${{ github.repository }} - EVENT_NAME: ${{ github.event_name }} PR_NUMBER: ${{ github.event.pull_request.number }} BASE_SHA: ${{ github.event.pull_request.base.sha }} HEAD_SHA: ${{ github.event.pull_request.head.sha }} run: | set -euo pipefail - # Only comment when this run is for a PR event - IS_PR=0 - if [ "$EVENT_NAME" = "pull_request" ]; then - IS_PR=1 - fi - - if [ "$IS_PR" -eq 1 ]; then - echo "Base: $BASE_SHA" - echo "Head: $HEAD_SHA" - DIFF_BASE="$BASE_SHA" - DIFF_HEAD="$HEAD_SHA" - else - echo "Push run: comparing against previous commit" - DIFF_BASE="${GITHUB_SHA}^" - DIFF_HEAD="${GITHUB_SHA}" - fi + echo "Base: $BASE_SHA" + echo "Head: $HEAD_SHA" - mapfile -t FILES < <(git diff --name-only "$DIFF_BASE" "$DIFF_HEAD" \ + mapfile -t FILES < <(git diff --name-only "$BASE_SHA" "$HEAD_SHA" \ | grep -E '\.(md|mdx|rst|txt)$' || true) if [ "${#FILES[@]}" -eq 0 ]; then @@ -76,19 +58,11 @@ jobs: JAR="$(ls -1 "$LT_DIR"/languagetool-commandline.jar)" LANG="en-US" - issues=0 - REPORT_FILE="$(mktemp)" - : > "$REPORT_FILE" - - for f in "${FILES[@]}"; do - echo "-----" - echo "Checking: $f" - - tmp="$(mktemp)" - - # Preprocess with Python. If preprocessing fails, fall back to the original file. - if python3 - "$f" > "$tmp" 2>/dev/null <<'PY' + # Write a preprocessor script to a file (avoids YAML/heredoc indentation issues) + PREPROCESS="$(mktemp)" + cat > "$PREPROCESS" <<'PY' import re, sys + path = sys.argv[1] text = open(path, "r", encoding="utf-8", errors="replace").read() @@ -117,7 +91,19 @@ text = re.sub(r"\b[A-Za-z][A-Za-z0-9_-]{14,}\b", " IDENT ", text) text = re.sub(r"[ \t]+", " ", text) sys.stdout.write(text) PY - then + + issues=0 + REPORT_FILE="$(mktemp)" + : > "$REPORT_FILE" + + for f in "${FILES[@]}"; do + echo "-----" + echo "Checking: $f" + + tmp="$(mktemp)" + + # Preprocess; if it fails, fall back to original file (but keep the workflow running) + if python3 "$PREPROCESS" "$f" > "$tmp" 2>/dev/null; then : else cp "$f" "$tmp" @@ -139,35 +125,36 @@ PY fi done - # If PR: upsert a single comment with a marker - if [ "$IS_PR" -eq 1 ]; then - MARKER="" - BODY_FILE="$(mktemp)" + rm -f "$PREPROCESS" || true - if [ "$issues" -ne 0 ]; then - { - echo "$MARKER" - echo "### LanguageTool findings" - echo - echo "_Checked files changed in this PR (frontmatter + fenced code removed; inline code stripped)._" - echo - echo '```' - cat "$REPORT_FILE" - echo '```' - } > "$BODY_FILE" - else - { - echo "$MARKER" - echo "### LanguageTool findings" - echo - echo "✅ No issues found in changed files." - } > "$BODY_FILE" - fi + # Upsert a single PR comment (marker-based) + MARKER="" + BODY_FILE="$(mktemp)" + + if [ "$issues" -ne 0 ]; then + { + echo "$MARKER" + echo "### LanguageTool findings" + echo + echo "_Checked files changed in this PR (frontmatter + fenced code removed; inline code stripped)._" + echo + echo '```' + cat "$REPORT_FILE" + echo '```' + } > "$BODY_FILE" + else + { + echo "$MARKER" + echo "### LanguageTool findings" + echo + echo "✅ No issues found in changed files." + } > "$BODY_FILE" + fi - COMMENTS_JSON="$(mktemp)" - gh api "repos/$REPO/issues/$PR_NUMBER/comments?per_page=100" > "$COMMENTS_JSON" + COMMENTS_JSON="$(mktemp)" + gh api "repos/$REPO/issues/$PR_NUMBER/comments?per_page=100 exporting=false" > "$COMMENTS_JSON" - COMMENT_ID="$(python3 - "$COMMENTS_JSON" <<'PY' + COMMENT_ID="$(python3 - "$COMMENTS_JSON" <<'PY' import json, sys data = json.load(open(sys.argv[1], "r", encoding="utf-8")) for c in data: @@ -175,17 +162,19 @@ for c in data: print(c["id"]) break PY - )" + )" - if [ -n "${COMMENT_ID:-}" ]; then - gh api -X PATCH "repos/$REPO/issues/comments/$COMMENT_ID" -f body="$(cat "$BODY_FILE")" >/dev/null - echo "Updated existing LanguageTool comment." - else - gh api -X POST "repos/$REPO/issues/$PR_NUMBER/comments" -f body="$(cat "$BODY_FILE")" >/dev/null - echo "Posted new LanguageTool comment." - fi + if [ -n "${COMMENT_ID:-}" ]; then + gh api -X PATCH "repos/$REPO/issues/comments/$COMMENT_ID" -f body="$(cat "$BODY_FILE")" >/dev/null + echo "Updated existing LanguageTool comment." + else + gh api -X POST "repos/$REPO/issues/$PR_NUMBER/comments" -f body="$(cat "$BODY_FILE")" >/dev/null + echo "Posted new LanguageTool comment." fi + rm -f "$COMMENTS_JSON" "$BODY_FILE" "$REPORT_FILE" || true + + # Fail the check if there were findings (remove this block if you want advisory-only) if [ "$issues" -ne 0 ]; then exit 1 fi From 1ff1ed4034b4f735c75548429bcb540403ee39ae Mon Sep 17 00:00:00 2001 From: CookieSource <36531905+CookieSource@users.noreply.github.com> Date: Tue, 13 Jan 2026 19:22:37 +0100 Subject: [PATCH 08/22] Refactor LanguageTool workflow for PR review Updated the LanguageTool workflow to trigger on pull request target events and added rerun functionality via issue comments. --- .github/workflows/languagetool-pr.yml | 290 ++++++++++++-------------- 1 file changed, 133 insertions(+), 157 deletions(-) diff --git a/.github/workflows/languagetool-pr.yml b/.github/workflows/languagetool-pr.yml index 92a5f038..9542ed02 100644 --- a/.github/workflows/languagetool-pr.yml +++ b/.github/workflows/languagetool-pr.yml @@ -1,180 +1,156 @@ -name: LanguageTool (PR) +name: LanguageTool (PR review) on: - pull_request: - types: [opened, synchronize, reopened, ready_for_review] + # Run once when the PR is opened/re-opened. + pull_request_target: + types: [opened, reopened, labeled] + + # Allow maintainers to rerun by commenting "/languagetool" + issue_comment: + types: [created] permissions: contents: read pull-requests: write + issues: write + +concurrency: + group: languagetool-${{ github.event.pull_request.number || github.event.issue.number || github.run_id }} + cancel-in-progress: true + +env: + LT_LANGUAGE: en-US + RERUN_LABEL: languagetool:run jobs: + # Comment command -> adds a label -> label event triggers the real run + rerun_on_comment: + if: | + github.event_name == 'issue_comment' && + github.event.issue.pull_request && + contains(github.event.comment.body, '/languagetool') && + (github.event.comment.author_association == 'MEMBER' || + github.event.comment.author_association == 'OWNER' || + github.event.comment.author_association == 'COLLABORATOR') + runs-on: ubuntu-latest + steps: + - name: Add rerun label to PR + uses: actions/github-script@v7 + with: + script: | + const label = process.env.RERUN_LABEL; + const owner = context.repo.owner; + const repo = context.repo.repo; + const issue_number = context.issue.number; // PR number for issue_comment + + // Ensure label exists (create if missing) + try { + await github.rest.issues.getLabel({ owner, repo, name: label }); + } catch (e) { + await github.rest.issues.createLabel({ + owner, + repo, + name: label, + color: '0e8a16', + description: 'Rerun LanguageTool on this PR' + }); + } + + await github.rest.issues.addLabels({ + owner, + repo, + issue_number, + labels: [label] + }); + languagetool: + if: | + github.event_name == 'pull_request_target' && + ( + github.event.action == 'opened' || + github.event.action == 'reopened' || + (github.event.action == 'labeled' && github.event.label.name == 'languagetool:run') + ) runs-on: ubuntu-latest + steps: - - name: Checkout + - name: Checkout PR (head SHA) uses: actions/checkout@v4 with: + ref: ${{ github.event.pull_request.head.sha }} fetch-depth: 0 - - name: Set up Java - uses: actions/setup-java@v4 - with: - distribution: temurin - java-version: "17" - - - name: Download LanguageTool + - name: Build LanguageTool server image with custom dictionary + shell: bash run: | set -euo pipefail - LT_VERSION="6.4" - curl -fsSL -o LT.zip "https://languagetool.org/download/LanguageTool-${LT_VERSION}.zip" - unzip -q LT.zip - echo "LT_DIR=LanguageTool-${LT_VERSION}" >> "$GITHUB_ENV" - - - name: Run LanguageTool on changed PR files + comment summary - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - REPO: ${{ github.repository }} - PR_NUMBER: ${{ github.event.pull_request.number }} - BASE_SHA: ${{ github.event.pull_request.base.sha }} - HEAD_SHA: ${{ github.event.pull_request.head.sha }} + + WORDS_DIR=".github/languagetool" + SPELLING_FILE="$WORDS_DIR/spelling.en.txt" + IGNORE_FILE="$WORDS_DIR/ignore.en.txt" + + mkdir -p "$WORDS_DIR" + test -f "$SPELLING_FILE" || : > "$SPELLING_FILE" + test -f "$IGNORE_FILE" || : > "$IGNORE_FILE" + + # Safety cap (avoid someone committing a gigantic word list) + head -n 2000 "$SPELLING_FILE" > /tmp/spelling_additions.txt + head -n 2000 "$IGNORE_FILE" > /tmp/ignore_additions.txt + + mkdir -p /tmp/lt + cp /tmp/spelling_additions.txt /tmp/lt/spelling_additions.txt + cp /tmp/ignore_additions.txt /tmp/lt/ignore_additions.txt + + cat > /tmp/lt/Dockerfile <<'EOF' + FROM erikvl87/languagetool:latest + USER root + COPY spelling_additions.txt /tmp/spelling_additions.txt + COPY ignore_additions.txt /tmp/ignore_additions.txt + RUN set -e; \ + if [ -s /tmp/spelling_additions.txt ]; then (echo; cat /tmp/spelling_additions.txt) >> org/languagetool/resource/en/hunspell/spelling.txt; fi; \ + if [ -s /tmp/ignore_additions.txt ]; then (echo; cat /tmp/ignore_additions.txt) >> org/languagetool/resource/en/hunspell/ignore.txt; fi + USER languagetool + EOF + + docker build -t lt-custom /tmp/lt + + - name: Start LanguageTool server + shell: bash run: | set -euo pipefail + docker run -d --name languagetool -p 8010:8010 lt-custom - echo "Base: $BASE_SHA" - echo "Head: $HEAD_SHA" - - mapfile -t FILES < <(git diff --name-only "$BASE_SHA" "$HEAD_SHA" \ - | grep -E '\.(md|mdx|rst|txt)$' || true) - - if [ "${#FILES[@]}" -eq 0 ]; then - echo "No matching files changed. Skipping." - exit 0 - fi - - echo "Files to check:" - printf ' - %s\n' "${FILES[@]}" - - JAR="$(ls -1 "$LT_DIR"/languagetool-commandline.jar)" - LANG="en-US" - - # Write a preprocessor script to a file (avoids YAML/heredoc indentation issues) - PREPROCESS="$(mktemp)" - cat > "$PREPROCESS" <<'PY' -import re, sys - -path = sys.argv[1] -text = open(path, "r", encoding="utf-8", errors="replace").read() - -# Remove YAML frontmatter at top -if text.startswith("---\n"): - m = re.match(r"^---\n.*?\n---\n", text, flags=re.S) - if m: - text = text[m.end():] - -# Remove fenced code blocks -text = re.sub(r"^```.*?$.*?^```.*?$", "\n", text, flags=re.S | re.M) - -# Remove inline code spans -text = re.sub(r"`[^`]*`", " ", text) - -# Neutralize path-ish tokens -text = re.sub(r"\b(?:~?/)?[A-Za-z0-9._-]+(?:/[A-Za-z0-9._-]+)+\b", " PATH ", text) - -# Neutralize common filename tokens with extensions -exts = r"(so|a|o|dylib|dll|exe|bin|iso|img|qcow2|raw|tar|gz|bz2|xz|zip|7z|deb|rpm|jar|war|py|js|ts|jsx|tsx|java|c|cc|cpp|h|hpp|rs|go|rb|php|sh|yaml|yml|toml|json|xml|md|mdx|rst|txt)" -text = re.sub(rf"\b[A-Za-z0-9._-]+\.(?:{exts})\b", " FILE ", text, flags=re.I) - -# Neutralize very long identifier-ish tokens -text = re.sub(r"\b[A-Za-z][A-Za-z0-9_-]{14,}\b", " IDENT ", text) - -text = re.sub(r"[ \t]+", " ", text) -sys.stdout.write(text) -PY - - issues=0 - REPORT_FILE="$(mktemp)" - : > "$REPORT_FILE" - - for f in "${FILES[@]}"; do - echo "-----" - echo "Checking: $f" - - tmp="$(mktemp)" - - # Preprocess; if it fails, fall back to original file (but keep the workflow running) - if python3 "$PREPROCESS" "$f" > "$tmp" 2>/dev/null; then - : - else - cp "$f" "$tmp" - fi - - out="$(java -jar "$JAR" -l "$LANG" "$tmp" || true)" - rm -f "$tmp" - - if [ -n "$out" ]; then - issues=1 - echo "$out" - { - echo "FILE: $f" - echo "$out" - echo - } >> "$REPORT_FILE" - else - echo "OK" + # Wait until the API is up + for i in {1..60}; do + if curl -fsS http://127.0.0.1:8010/v2/languages >/dev/null; then + exit 0 fi + sleep 1 done - rm -f "$PREPROCESS" || true - - # Upsert a single PR comment (marker-based) - MARKER="" - BODY_FILE="$(mktemp)" - - if [ "$issues" -ne 0 ]; then - { - echo "$MARKER" - echo "### LanguageTool findings" - echo - echo "_Checked files changed in this PR (frontmatter + fenced code removed; inline code stripped)._" - echo - echo '```' - cat "$REPORT_FILE" - echo '```' - } > "$BODY_FILE" - else - { - echo "$MARKER" - echo "### LanguageTool findings" - echo - echo "✅ No issues found in changed files." - } > "$BODY_FILE" - fi - - COMMENTS_JSON="$(mktemp)" - gh api "repos/$REPO/issues/$PR_NUMBER/comments?per_page=100 exporting=false" > "$COMMENTS_JSON" - - COMMENT_ID="$(python3 - "$COMMENTS_JSON" <<'PY' -import json, sys -data = json.load(open(sys.argv[1], "r", encoding="utf-8")) -for c in data: - if "" in (c.get("body") or ""): - print(c["id"]) - break -PY - )" - - if [ -n "${COMMENT_ID:-}" ]; then - gh api -X PATCH "repos/$REPO/issues/comments/$COMMENT_ID" -f body="$(cat "$BODY_FILE")" >/dev/null - echo "Updated existing LanguageTool comment." - else - gh api -X POST "repos/$REPO/issues/$PR_NUMBER/comments" -f body="$(cat "$BODY_FILE")" >/dev/null - echo "Posted new LanguageTool comment." - fi - - rm -f "$COMMENTS_JSON" "$BODY_FILE" "$REPORT_FILE" || true - - # Fail the check if there were findings (remove this block if you want advisory-only) - if [ "$issues" -ne 0 ]; then - exit 1 - fi + echo "LanguageTool server did not start in time" >&2 + docker logs languagetool || true + exit 1 + + - name: Run LanguageTool and comment suggestions on the PR + uses: reviewdog/action-languagetool@v1.23.0 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + reporter: github-pr-review + level: info + patterns: "**/*.md **/*.txt **/*.rst **/*.adoc" + language: ${{ env.LT_LANGUAGE }} + custom_api_endpoint: "http://127.0.0.1:8010" + + - name: Remove rerun label (so maintainers can trigger again) + if: github.event.action == 'labeled' && github.event.label.name == 'languagetool:run' + continue-on-error: true + uses: actions/github-script@v7 + with: + script: | + await github.rest.issues.removeLabel({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.pull_request.number, + name: 'languagetool:run', + }); From fd877f3af2e43d39751454185980815871cf9427 Mon Sep 17 00:00:00 2001 From: CookieSource <36531905+CookieSource@users.noreply.github.com> Date: Tue, 13 Jan 2026 19:31:28 +0100 Subject: [PATCH 09/22] Update languagetool-pr.yml --- .github/workflows/languagetool-pr.yml | 224 ++++++++++++++------------ 1 file changed, 122 insertions(+), 102 deletions(-) diff --git a/.github/workflows/languagetool-pr.yml b/.github/workflows/languagetool-pr.yml index 9542ed02..f67814bd 100644 --- a/.github/workflows/languagetool-pr.yml +++ b/.github/workflows/languagetool-pr.yml @@ -1,150 +1,166 @@ -name: LanguageTool (PR review) +name: LanguageTool (reviewdog) on: - # Run once when the PR is opened/re-opened. pull_request_target: types: [opened, reopened, labeled] - - # Allow maintainers to rerun by commenting "/languagetool" issue_comment: types: [created] permissions: contents: read pull-requests: write - issues: write concurrency: - group: languagetool-${{ github.event.pull_request.number || github.event.issue.number || github.run_id }} + group: languagetool-${{ github.event.pull_request.number || github.event.issue.number }} cancel-in-progress: true -env: - LT_LANGUAGE: en-US - RERUN_LABEL: languagetool:run - jobs: - # Comment command -> adds a label -> label event triggers the real run - rerun_on_comment: - if: | - github.event_name == 'issue_comment' && - github.event.issue.pull_request && - contains(github.event.comment.body, '/languagetool') && - (github.event.comment.author_association == 'MEMBER' || - github.event.comment.author_association == 'OWNER' || - github.event.comment.author_association == 'COLLABORATOR') + languagetool: runs-on: ubuntu-latest + steps: - - name: Add rerun label to PR + - name: Decide whether to run + gather PR info + id: meta uses: actions/github-script@v7 with: script: | - const label = process.env.RERUN_LABEL; - const owner = context.repo.owner; - const repo = context.repo.repo; - const issue_number = context.issue.number; // PR number for issue_comment - - // Ensure label exists (create if missing) - try { - await github.rest.issues.getLabel({ owner, repo, name: label }); - } catch (e) { - await github.rest.issues.createLabel({ - owner, - repo, - name: label, - color: '0e8a16', - description: 'Rerun LanguageTool on this PR' + const eventName = context.eventName; + + async function getPerm(username) { + const res = await github.rest.repos.getCollaboratorPermissionLevel({ + owner: context.repo.owner, + repo: context.repo.repo, + username, }); + return res.data.permission; // admin|maintain|write|triage|read|none } - await github.rest.issues.addLabels({ - owner, - repo, - issue_number, - labels: [label] - }); + let run = false; + let prNumber = null; + let pr = null; + + if (eventName === "pull_request_target") { + pr = context.payload.pull_request; + prNumber = pr.number; + + if (context.payload.action === "labeled") { + run = (context.payload.label?.name === "languagetool:rerun"); + } else { + // opened / reopened + run = true; + } + } else if (eventName === "issue_comment") { + // only run for PR comments + if (!context.payload.issue?.pull_request) { + run = false; + } else { + const body = (context.payload.comment?.body || "").trim(); + const wants = body.startsWith("/languagetool"); + if (!wants) { + run = false; + } else { + const perm = await getPerm(context.payload.comment.user.login); + run = ["admin", "maintain", "write"].includes(perm); + } + + prNumber = context.payload.issue.number; + const prRes = await github.rest.pulls.get({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: prNumber, + }); + pr = prRes.data; + } + } - languagetool: - if: | - github.event_name == 'pull_request_target' && - ( - github.event.action == 'opened' || - github.event.action == 'reopened' || - (github.event.action == 'labeled' && github.event.label.name == 'languagetool:run') - ) - runs-on: ubuntu-latest + core.setOutput("run", run ? "true" : "false"); + if (!pr) return; - steps: - - name: Checkout PR (head SHA) + core.setOutput("pr_number", String(prNumber)); + core.setOutput("head_sha", pr.head.sha); + core.setOutput("base_sha", pr.base.sha); + core.setOutput("head_repo", pr.head.repo.full_name); + core.setOutput("base_repo", pr.base.repo.full_name); + + - name: Stop early if not requested + if: steps.meta.outputs.run != 'true' + run: echo "Not running LanguageTool." + + - name: Checkout PR head (safe) + if: steps.meta.outputs.run == 'true' uses: actions/checkout@v4 with: - ref: ${{ github.event.pull_request.head.sha }} + repository: ${{ steps.meta.outputs.head_repo }} + ref: ${{ steps.meta.outputs.head_sha }} fetch-depth: 0 + persist-credentials: false + submodules: false - - name: Build LanguageTool server image with custom dictionary - shell: bash + - name: Fetch base SHA for diffing + if: steps.meta.outputs.run == 'true' run: | set -euo pipefail + git remote add upstream "https://github.com/${{ steps.meta.outputs.base_repo }}.git" || true + git fetch --no-tags --depth=1 upstream "${{ steps.meta.outputs.base_sha }}" - WORDS_DIR=".github/languagetool" - SPELLING_FILE="$WORDS_DIR/spelling.en.txt" - IGNORE_FILE="$WORDS_DIR/ignore.en.txt" - - mkdir -p "$WORDS_DIR" - test -f "$SPELLING_FILE" || : > "$SPELLING_FILE" - test -f "$IGNORE_FILE" || : > "$IGNORE_FILE" - - # Safety cap (avoid someone committing a gigantic word list) - head -n 2000 "$SPELLING_FILE" > /tmp/spelling_additions.txt - head -n 2000 "$IGNORE_FILE" > /tmp/ignore_additions.txt - - mkdir -p /tmp/lt - cp /tmp/spelling_additions.txt /tmp/lt/spelling_additions.txt - cp /tmp/ignore_additions.txt /tmp/lt/ignore_additions.txt + - name: Setup Python + if: steps.meta.outputs.run == 'true' + uses: actions/setup-python@v5 + with: + python-version: "3.11" - cat > /tmp/lt/Dockerfile <<'EOF' - FROM erikvl87/languagetool:latest - USER root - COPY spelling_additions.txt /tmp/spelling_additions.txt - COPY ignore_additions.txt /tmp/ignore_additions.txt - RUN set -e; \ - if [ -s /tmp/spelling_additions.txt ]; then (echo; cat /tmp/spelling_additions.txt) >> org/languagetool/resource/en/hunspell/spelling.txt; fi; \ - if [ -s /tmp/ignore_additions.txt ]; then (echo; cat /tmp/ignore_additions.txt) >> org/languagetool/resource/en/hunspell/ignore.txt; fi - USER languagetool - EOF + - name: Install Python deps + if: steps.meta.outputs.run == 'true' + run: | + python -m pip install --upgrade pip + python -m pip install requests - docker build -t lt-custom /tmp/lt + - name: Setup reviewdog + if: steps.meta.outputs.run == 'true' + uses: reviewdog/action-setup@v1 + with: + reviewdog_version: latest - name: Start LanguageTool server - shell: bash + if: steps.meta.outputs.run == 'true' run: | set -euo pipefail - docker run -d --name languagetool -p 8010:8010 lt-custom + docker run -d --rm --name languagetool -p 8010:8010 erikvl87/languagetool:latest - # Wait until the API is up - for i in {1..60}; do - if curl -fsS http://127.0.0.1:8010/v2/languages >/dev/null; then + # Wait until ready + for i in $(seq 1 60); do + if curl -fsS "http://localhost:8010/v2/languages" >/dev/null; then + echo "LanguageTool is up." exit 0 fi - sleep 1 + sleep 2 done - echo "LanguageTool server did not start in time" >&2 + echo "LanguageTool did not become ready in time" >&2 docker logs languagetool || true exit 1 - - name: Run LanguageTool and comment suggestions on the PR - uses: reviewdog/action-languagetool@v1.23.0 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - reporter: github-pr-review - level: info - patterns: "**/*.md **/*.txt **/*.rst **/*.adoc" - language: ${{ env.LT_LANGUAGE }} - custom_api_endpoint: "http://127.0.0.1:8010" - - - name: Remove rerun label (so maintainers can trigger again) - if: github.event.action == 'labeled' && github.event.label.name == 'languagetool:run' - continue-on-error: true + - name: Run LanguageTool and comment on PR + if: steps.meta.outputs.run == 'true' + env: + REVIEWDOG_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + python .github/scripts/languagetool_reviewdog.py \ + --api-url "http://localhost:8010/v2/check" \ + --language "en-US" \ + --base-sha "${{ steps.meta.outputs.base_sha }}" \ + --head-sha "${{ steps.meta.outputs.head_sha }}" \ + --dictionary ".languagetool/words.txt" \ + | reviewdog -f=rdjson \ + -name="LanguageTool" \ + -reporter="github-pr-review" \ + -filter-mode="file" \ + -fail-level="none" \ + -level="warning" + + - name: Remove rerun label (so it can be added again later) + if: steps.meta.outputs.run == 'true' && github.event_name == 'pull_request_target' && github.event.action == 'labeled' && github.event.label.name == 'languagetool:rerun' uses: actions/github-script@v7 with: script: | @@ -152,5 +168,9 @@ jobs: owner: context.repo.owner, repo: context.repo.repo, issue_number: context.payload.pull_request.number, - name: 'languagetool:run', + name: "languagetool:rerun", }); + + - name: Stop LanguageTool + if: always() && steps.meta.outputs.run == 'true' + run: docker stop languagetool || true From 6cfe2b6daa9193924184264a447bf2ae65494aca Mon Sep 17 00:00:00 2001 From: CookieSource <36531905+CookieSource@users.noreply.github.com> Date: Tue, 13 Jan 2026 19:35:25 +0100 Subject: [PATCH 10/22] Add languagetool_reviewdog.py script --- .github/scripts/languagetool_reviewdog.py | 185 ++++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 .github/scripts/languagetool_reviewdog.py diff --git a/.github/scripts/languagetool_reviewdog.py b/.github/scripts/languagetool_reviewdog.py new file mode 100644 index 00000000..050dc37e --- /dev/null +++ b/.github/scripts/languagetool_reviewdog.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python3 +import argparse +import json +import os +import re +import subprocess +from typing import Dict, List, Optional, Set, Tuple + +import requests + + +def sh(*args: str) -> str: + return subprocess.check_output(args, text=True).strip() + + +def offset_to_line_col(text: str, offset: int) -> Tuple[int, int]: + # reviewdog wants 1-based line/column + line = text.count("\n", 0, offset) + 1 + last_nl = text.rfind("\n", 0, offset) + col = offset - (last_nl + 1) + 1 + return line, col + + +def normalize_word(s: str) -> str: + s = re.sub(r"^[\W_]+|[\W_]+$", "", s, flags=re.UNICODE) + return s.lower() + + +def load_dictionary(path: str) -> Set[str]: + if not path or not os.path.exists(path): + return set() + words: Set[str] = set() + with open(path, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line or line.startswith("#"): + continue + words.add(line.lower()) + return words + + +def changed_files(base_sha: str, head_sha: str) -> List[str]: + # list only changed files in the PR + out = sh("git", "diff", "--name-only", base_sha, head_sha) + files = [x.strip() for x in out.splitlines() if x.strip()] + return files + + +def is_text_file(path: str) -> bool: + ext = os.path.splitext(path)[1].lower() + return ext in {".md", ".txt", ".rst", ".adoc", ".asciidoc", ".tex"} or os.path.basename(path).lower() in { + "readme", "readme.md", "readme.txt" + } + + +def lt_check(api_url: str, language: str, text: str) -> Dict: + resp = requests.post( + api_url, + data={ + "language": language, + "text": text, + }, + timeout=60, + ) + resp.raise_for_status() + return resp.json() + + +def main() -> int: + ap = argparse.ArgumentParser() + ap.add_argument("--api-url", required=True) + ap.add_argument("--language", required=True) + ap.add_argument("--base-sha", required=True) + ap.add_argument("--head-sha", required=True) + ap.add_argument("--dictionary", default=".languagetool/words.txt") + ap.add_argument("--max-suggestions", type=int, default=3) + args = ap.parse_args() + + dict_words = load_dictionary(args.dictionary) + + files = changed_files(args.base_sha, args.head_sha) + files = [f for f in files if os.path.exists(f) and is_text_file(f)] + + diagnostics: List[Dict] = [] + + for path in files: + try: + with open(path, "r", encoding="utf-8") as f: + content = f.read() + except UnicodeDecodeError: + with open(path, "r", encoding="utf-8", errors="replace") as f: + content = f.read() + + if not content.strip(): + continue + + try: + result = lt_check(args.api_url, args.language, content) + except Exception as e: + # Emit a single diagnostic if the API call fails for a file + diagnostics.append( + { + "message": f"LanguageTool API error for {path}: {e}", + "location": {"path": path, "range": {"start": {"line": 1, "column": 1}}}, + "severity": "WARNING", + } + ) + continue + + matches = result.get("matches", []) + for m in matches: + offset = int(m.get("offset", 0)) + length = int(m.get("length", 0)) + bad = content[offset : offset + length] + + rule = m.get("rule", {}) or {} + rule_id = rule.get("id") or "UNKNOWN_RULE" + category = (rule.get("category", {}) or {}).get("id", "") + + # Cheap custom dictionary support without modifying LT server: + # if LT reports a spelling/typo-ish issue AND the token is in our dictionary -> ignore it. + # (Most spelling problems show up in category TYPOS and/or rule ids containing MORFOLOGIK.) + bad_norm = normalize_word(bad) + if dict_words and bad_norm: + looks_like_spelling = (category.upper() == "TYPOS") or ("MORFOLOGIK" in str(rule_id).upper()) + if looks_like_spelling and (bad_norm in dict_words): + continue + + start_line, start_col = offset_to_line_col(content, offset) + end_line, end_col = offset_to_line_col(content, offset + max(length, 0)) + + # Suggestions (as rdjson "suggestions" with ranges) + suggestions = [] + repls = m.get("replacements", []) or [] + for r in repls[: args.max_suggestions]: + val = r.get("value") + if not val: + continue + suggestions.append( + { + "range": { + "start": {"line": start_line, "column": start_col}, + "end": {"line": end_line, "column": end_col}, + }, + "text": val, + } + ) + + code = {"value": rule_id} + urls = rule.get("urls") or [] + if urls and isinstance(urls, list): + u = urls[0].get("value") + if u: + code["url"] = u + + diagnostics.append( + { + "message": m.get("message") or "LanguageTool finding", + "location": { + "path": path, + "range": { + "start": {"line": start_line, "column": start_col}, + "end": {"line": end_line, "column": end_col}, + }, + }, + "severity": "WARNING", + "code": code, + **({"suggestions": suggestions} if suggestions else {}), + } + ) + + rdjson = { + "source": { + "name": "LanguageTool", + "url": "https://languagetool.org", + }, + "diagnostics": diagnostics, + } + + print(json.dumps(rdjson)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) From ebfebd125741f10f36fa66b5384f58097d67b432 Mon Sep 17 00:00:00 2001 From: CookieSource <36531905+CookieSource@users.noreply.github.com> Date: Tue, 13 Jan 2026 19:36:47 +0100 Subject: [PATCH 11/22] Update languagetool-pr.yml --- .github/workflows/languagetool-pr.yml | 290 +++++++++++++++++--------- 1 file changed, 194 insertions(+), 96 deletions(-) diff --git a/.github/workflows/languagetool-pr.yml b/.github/workflows/languagetool-pr.yml index f67814bd..70ffbac2 100644 --- a/.github/workflows/languagetool-pr.yml +++ b/.github/workflows/languagetool-pr.yml @@ -1,4 +1,4 @@ -name: LanguageTool (reviewdog) +name: LanguageTool (PR review) on: pull_request_target: @@ -9,158 +9,256 @@ on: permissions: contents: read pull-requests: write + issues: write concurrency: - group: languagetool-${{ github.event.pull_request.number || github.event.issue.number }} + group: languagetool-${{ github.event.pull_request.number || github.event.issue.number || github.run_id }} cancel-in-progress: true +env: + LT_LANGUAGE: en-US + RERUN_LABEL: languagetool:rerun + LT_PORT: "8010" + jobs: - languagetool: + # 1) Comments do NOT run reviewdog. They only tag the PR. + rerun_on_comment: + if: | + github.event_name == 'issue_comment' && + github.event.issue.pull_request && + startsWith(github.event.comment.body, '/languagetool') && + (github.event.comment.author_association == 'MEMBER' || + github.event.comment.author_association == 'OWNER' || + github.event.comment.author_association == 'COLLABORATOR') runs-on: ubuntu-latest - steps: - - name: Decide whether to run + gather PR info - id: meta + - name: Add rerun label to PR uses: actions/github-script@v7 with: script: | - const eventName = context.eventName; + const owner = context.repo.owner; + const repo = context.repo.repo; + const issue_number = context.issue.number; + const label = process.env.RERUN_LABEL; - async function getPerm(username) { - const res = await github.rest.repos.getCollaboratorPermissionLevel({ - owner: context.repo.owner, - repo: context.repo.repo, - username, + // Ensure the label exists + try { + await github.rest.issues.getLabel({ owner, repo, name: label }); + } catch (e) { + await github.rest.issues.createLabel({ + owner, repo, name: label, color: '0e8a16', + description: 'Rerun LanguageTool on this PR' }); - return res.data.permission; // admin|maintain|write|triage|read|none - } - - let run = false; - let prNumber = null; - let pr = null; - - if (eventName === "pull_request_target") { - pr = context.payload.pull_request; - prNumber = pr.number; - - if (context.payload.action === "labeled") { - run = (context.payload.label?.name === "languagetool:rerun"); - } else { - // opened / reopened - run = true; - } - } else if (eventName === "issue_comment") { - // only run for PR comments - if (!context.payload.issue?.pull_request) { - run = false; - } else { - const body = (context.payload.comment?.body || "").trim(); - const wants = body.startsWith("/languagetool"); - if (!wants) { - run = false; - } else { - const perm = await getPerm(context.payload.comment.user.login); - run = ["admin", "maintain", "write"].includes(perm); - } - - prNumber = context.payload.issue.number; - const prRes = await github.rest.pulls.get({ - owner: context.repo.owner, - repo: context.repo.repo, - pull_number: prNumber, - }); - pr = prRes.data; - } } - core.setOutput("run", run ? "true" : "false"); - if (!pr) return; - - core.setOutput("pr_number", String(prNumber)); - core.setOutput("head_sha", pr.head.sha); - core.setOutput("base_sha", pr.base.sha); - core.setOutput("head_repo", pr.head.repo.full_name); - core.setOutput("base_repo", pr.base.repo.full_name); + await github.rest.issues.addLabels({ + owner, repo, issue_number, labels: [label] + }); - - name: Stop early if not requested - if: steps.meta.outputs.run != 'true' - run: echo "Not running LanguageTool." + # 2) Actual PR run: opened/reopened OR labeled with rerun label + languagetool: + if: | + github.event_name == 'pull_request_target' && + ( + github.event.action == 'opened' || + github.event.action == 'reopened' || + (github.event.action == 'labeled' && github.event.label.name == 'languagetool:rerun') + ) + runs-on: ubuntu-latest + steps: - name: Checkout PR head (safe) - if: steps.meta.outputs.run == 'true' uses: actions/checkout@v4 with: - repository: ${{ steps.meta.outputs.head_repo }} - ref: ${{ steps.meta.outputs.head_sha }} + repository: ${{ github.event.pull_request.head.repo.full_name }} + ref: ${{ github.event.pull_request.head.sha }} fetch-depth: 0 persist-credentials: false submodules: false - name: Fetch base SHA for diffing - if: steps.meta.outputs.run == 'true' run: | set -euo pipefail - git remote add upstream "https://github.com/${{ steps.meta.outputs.base_repo }}.git" || true - git fetch --no-tags --depth=1 upstream "${{ steps.meta.outputs.base_sha }}" + git remote add upstream "https://github.com/${{ github.event.pull_request.base.repo.full_name }}.git" || true + git fetch --no-tags --depth=1 upstream "${{ github.event.pull_request.base.sha }}" - name: Setup Python - if: steps.meta.outputs.run == 'true' uses: actions/setup-python@v5 with: python-version: "3.11" - - name: Install Python deps - if: steps.meta.outputs.run == 'true' + - name: Install deps run: | python -m pip install --upgrade pip python -m pip install requests - name: Setup reviewdog - if: steps.meta.outputs.run == 'true' uses: reviewdog/action-setup@v1 with: reviewdog_version: latest - name: Start LanguageTool server - if: steps.meta.outputs.run == 'true' run: | set -euo pipefail - docker run -d --rm --name languagetool -p 8010:8010 erikvl87/languagetool:latest + docker run -d --rm --name languagetool -p "${LT_PORT}:8010" erikvl87/languagetool:latest - # Wait until ready + # Wait until ready (avoid connection reset during warmup) for i in $(seq 1 60); do - if curl -fsS "http://localhost:8010/v2/languages" >/dev/null; then + if curl -fsS "http://localhost:${LT_PORT}/v2/languages" >/dev/null; then echo "LanguageTool is up." exit 0 fi sleep 2 done - echo "LanguageTool did not become ready in time" >&2 + echo "LanguageTool did not become ready" >&2 docker logs languagetool || true exit 1 - - name: Run LanguageTool and comment on PR - if: steps.meta.outputs.run == 'true' + - name: Run LanguageTool -> reviewdog PR review comments env: REVIEWDOG_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} + BASE_SHA: ${{ github.event.pull_request.base.sha }} + HEAD_SHA: ${{ github.event.pull_request.head.sha }} run: | set -euo pipefail - python .github/scripts/languagetool_reviewdog.py \ - --api-url "http://localhost:8010/v2/check" \ - --language "en-US" \ - --base-sha "${{ steps.meta.outputs.base_sha }}" \ - --head-sha "${{ steps.meta.outputs.head_sha }}" \ - --dictionary ".languagetool/words.txt" \ - | reviewdog -f=rdjson \ - -name="LanguageTool" \ - -reporter="github-pr-review" \ - -filter-mode="file" \ - -fail-level="none" \ - -level="warning" - - - name: Remove rerun label (so it can be added again later) - if: steps.meta.outputs.run == 'true' && github.event_name == 'pull_request_target' && github.event.action == 'labeled' && github.event.label.name == 'languagetool:rerun' + + # Inline python: produce rdjson for reviewdog (no repo script file needed) + python - <<'PY' > /tmp/rd.json + import json, os, re, subprocess + import requests + + API_URL = f"http://localhost:{os.environ['LT_PORT']}/v2/check" + LANGUAGE = os.environ.get("LT_LANGUAGE", "en-US") + BASE_SHA = os.environ["BASE_SHA"] + HEAD_SHA = os.environ["HEAD_SHA"] + DICT_PATH = ".languagetool/words.txt" + MAX_SUG = 3 + MAX_TEXT = 300_000 # avoid huge posts + + def sh(*args): + return subprocess.check_output(args, text=True).strip() + + def normalize_word(s: str) -> str: + s = re.sub(r"^[\W_]+|[\W_]+$", "", s, flags=re.UNICODE) + return s.lower() + + def load_dict(path): + if not os.path.exists(path): + return set() + out = set() + with open(path, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line or line.startswith("#"): + continue + out.add(line.lower()) + return out + + def offset_to_line_col(text, offset): + line = text.count("\n", 0, offset) + 1 + last_nl = text.rfind("\n", 0, offset) + col = offset - (last_nl + 1) + 1 + return line, col + + def changed_files(base, head): + out = sh("git", "diff", "--name-only", base, head) + return [x.strip() for x in out.splitlines() if x.strip()] + + def is_text_file(path): + ext = os.path.splitext(path)[1].lower() + return ext in {".md",".txt",".rst",".adoc",".asciidoc",".tex"} + + dict_words = load_dict(DICT_PATH) + files = [f for f in changed_files(BASE_SHA, HEAD_SHA) if os.path.exists(f) and is_text_file(f)] + diagnostics = [] + + for path in files: + try: + content = open(path, "r", encoding="utf-8").read() + except UnicodeDecodeError: + content = open(path, "r", encoding="utf-8", errors="replace").read() + + if not content.strip(): + continue + + if len(content) > MAX_TEXT: + content = content[:MAX_TEXT] + + try: + r = requests.post(API_URL, data={"language": LANGUAGE, "text": content}, timeout=60) + r.raise_for_status() + data = r.json() + except Exception as e: + diagnostics.append({ + "message": f"LanguageTool API error for {path}: {e}", + "location": {"path": path, "range": {"start": {"line": 1, "column": 1}}}, + "severity": "WARNING", + }) + continue + + for m in data.get("matches", []): + offset = int(m.get("offset", 0)) + length = int(m.get("length", 0)) + bad = content[offset:offset+length] + rule = m.get("rule", {}) or {} + rule_id = rule.get("id") or "UNKNOWN_RULE" + category = (rule.get("category", {}) or {}).get("id", "") + + # Custom dictionary: ignore spelling-ish matches when token is in words.txt + bad_norm = normalize_word(bad) + if dict_words and bad_norm: + looks_like_spelling = (category.upper() == "TYPOS") or ("MORFOLOGIK" in str(rule_id).upper()) + if looks_like_spelling and (bad_norm in dict_words): + continue + + sl, sc = offset_to_line_col(content, offset) + el, ec = offset_to_line_col(content, offset + max(length, 0)) + + suggestions = [] + for repl in (m.get("replacements") or [])[:MAX_SUG]: + v = repl.get("value") + if not v: + continue + suggestions.append({ + "range": {"start": {"line": sl, "column": sc}, "end": {"line": el, "column": ec}}, + "text": v, + }) + + code = {"value": rule_id} + urls = rule.get("urls") or [] + if urls and isinstance(urls, list): + u = urls[0].get("value") + if u: + code["url"] = u + + diagnostics.append({ + "message": m.get("message") or "LanguageTool finding", + "location": { + "path": path, + "range": {"start": {"line": sl, "column": sc}, "end": {"line": el, "column": ec}}, + }, + "severity": "WARNING", + "code": code, + **({"suggestions": suggestions} if suggestions else {}), + }) + + print(json.dumps({ + "source": {"name": "LanguageTool", "url": "https://languagetool.org"}, + "diagnostics": diagnostics + })) + PY + + reviewdog -f=rdjson \ + -name="LanguageTool" \ + -reporter="github-pr-review" \ + -filter-mode="diff_context" \ + -fail-level="none" \ + -level="warning" < /tmp/rd.json + + - name: Remove rerun label + if: github.event.action == 'labeled' && github.event.label.name == 'languagetool:rerun' + continue-on-error: true uses: actions/github-script@v7 with: script: | @@ -168,9 +266,9 @@ jobs: owner: context.repo.owner, repo: context.repo.repo, issue_number: context.payload.pull_request.number, - name: "languagetool:rerun", + name: process.env.RERUN_LABEL, }); - name: Stop LanguageTool - if: always() && steps.meta.outputs.run == 'true' + if: always() run: docker stop languagetool || true From d3d797f6a8bf2356649a29c3dfb51a7ef01f665d Mon Sep 17 00:00:00 2001 From: CookieSource <36531905+CookieSource@users.noreply.github.com> Date: Tue, 13 Jan 2026 19:50:06 +0100 Subject: [PATCH 12/22] Modify LanguageTool workflow for PR comments and Java setup Updated LanguageTool GitHub Actions workflow to allow synchronization events and changed the PR comment behavior. Replaced Python setup with Java setup for LanguageTool execution. --- .github/workflows/languagetool-pr.yml | 274 -------------------------- .github/workflows/languagetool.yml | 265 +++++++++++++++++++++++++ 2 files changed, 265 insertions(+), 274 deletions(-) delete mode 100644 .github/workflows/languagetool-pr.yml create mode 100644 .github/workflows/languagetool.yml diff --git a/.github/workflows/languagetool-pr.yml b/.github/workflows/languagetool-pr.yml deleted file mode 100644 index 70ffbac2..00000000 --- a/.github/workflows/languagetool-pr.yml +++ /dev/null @@ -1,274 +0,0 @@ -name: LanguageTool (PR review) - -on: - pull_request_target: - types: [opened, reopened, labeled] - issue_comment: - types: [created] - -permissions: - contents: read - pull-requests: write - issues: write - -concurrency: - group: languagetool-${{ github.event.pull_request.number || github.event.issue.number || github.run_id }} - cancel-in-progress: true - -env: - LT_LANGUAGE: en-US - RERUN_LABEL: languagetool:rerun - LT_PORT: "8010" - -jobs: - # 1) Comments do NOT run reviewdog. They only tag the PR. - rerun_on_comment: - if: | - github.event_name == 'issue_comment' && - github.event.issue.pull_request && - startsWith(github.event.comment.body, '/languagetool') && - (github.event.comment.author_association == 'MEMBER' || - github.event.comment.author_association == 'OWNER' || - github.event.comment.author_association == 'COLLABORATOR') - runs-on: ubuntu-latest - steps: - - name: Add rerun label to PR - uses: actions/github-script@v7 - with: - script: | - const owner = context.repo.owner; - const repo = context.repo.repo; - const issue_number = context.issue.number; - const label = process.env.RERUN_LABEL; - - // Ensure the label exists - try { - await github.rest.issues.getLabel({ owner, repo, name: label }); - } catch (e) { - await github.rest.issues.createLabel({ - owner, repo, name: label, color: '0e8a16', - description: 'Rerun LanguageTool on this PR' - }); - } - - await github.rest.issues.addLabels({ - owner, repo, issue_number, labels: [label] - }); - - # 2) Actual PR run: opened/reopened OR labeled with rerun label - languagetool: - if: | - github.event_name == 'pull_request_target' && - ( - github.event.action == 'opened' || - github.event.action == 'reopened' || - (github.event.action == 'labeled' && github.event.label.name == 'languagetool:rerun') - ) - runs-on: ubuntu-latest - - steps: - - name: Checkout PR head (safe) - uses: actions/checkout@v4 - with: - repository: ${{ github.event.pull_request.head.repo.full_name }} - ref: ${{ github.event.pull_request.head.sha }} - fetch-depth: 0 - persist-credentials: false - submodules: false - - - name: Fetch base SHA for diffing - run: | - set -euo pipefail - git remote add upstream "https://github.com/${{ github.event.pull_request.base.repo.full_name }}.git" || true - git fetch --no-tags --depth=1 upstream "${{ github.event.pull_request.base.sha }}" - - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - - name: Install deps - run: | - python -m pip install --upgrade pip - python -m pip install requests - - - name: Setup reviewdog - uses: reviewdog/action-setup@v1 - with: - reviewdog_version: latest - - - name: Start LanguageTool server - run: | - set -euo pipefail - docker run -d --rm --name languagetool -p "${LT_PORT}:8010" erikvl87/languagetool:latest - - # Wait until ready (avoid connection reset during warmup) - for i in $(seq 1 60); do - if curl -fsS "http://localhost:${LT_PORT}/v2/languages" >/dev/null; then - echo "LanguageTool is up." - exit 0 - fi - sleep 2 - done - - echo "LanguageTool did not become ready" >&2 - docker logs languagetool || true - exit 1 - - - name: Run LanguageTool -> reviewdog PR review comments - env: - REVIEWDOG_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} - BASE_SHA: ${{ github.event.pull_request.base.sha }} - HEAD_SHA: ${{ github.event.pull_request.head.sha }} - run: | - set -euo pipefail - - # Inline python: produce rdjson for reviewdog (no repo script file needed) - python - <<'PY' > /tmp/rd.json - import json, os, re, subprocess - import requests - - API_URL = f"http://localhost:{os.environ['LT_PORT']}/v2/check" - LANGUAGE = os.environ.get("LT_LANGUAGE", "en-US") - BASE_SHA = os.environ["BASE_SHA"] - HEAD_SHA = os.environ["HEAD_SHA"] - DICT_PATH = ".languagetool/words.txt" - MAX_SUG = 3 - MAX_TEXT = 300_000 # avoid huge posts - - def sh(*args): - return subprocess.check_output(args, text=True).strip() - - def normalize_word(s: str) -> str: - s = re.sub(r"^[\W_]+|[\W_]+$", "", s, flags=re.UNICODE) - return s.lower() - - def load_dict(path): - if not os.path.exists(path): - return set() - out = set() - with open(path, "r", encoding="utf-8") as f: - for line in f: - line = line.strip() - if not line or line.startswith("#"): - continue - out.add(line.lower()) - return out - - def offset_to_line_col(text, offset): - line = text.count("\n", 0, offset) + 1 - last_nl = text.rfind("\n", 0, offset) - col = offset - (last_nl + 1) + 1 - return line, col - - def changed_files(base, head): - out = sh("git", "diff", "--name-only", base, head) - return [x.strip() for x in out.splitlines() if x.strip()] - - def is_text_file(path): - ext = os.path.splitext(path)[1].lower() - return ext in {".md",".txt",".rst",".adoc",".asciidoc",".tex"} - - dict_words = load_dict(DICT_PATH) - files = [f for f in changed_files(BASE_SHA, HEAD_SHA) if os.path.exists(f) and is_text_file(f)] - diagnostics = [] - - for path in files: - try: - content = open(path, "r", encoding="utf-8").read() - except UnicodeDecodeError: - content = open(path, "r", encoding="utf-8", errors="replace").read() - - if not content.strip(): - continue - - if len(content) > MAX_TEXT: - content = content[:MAX_TEXT] - - try: - r = requests.post(API_URL, data={"language": LANGUAGE, "text": content}, timeout=60) - r.raise_for_status() - data = r.json() - except Exception as e: - diagnostics.append({ - "message": f"LanguageTool API error for {path}: {e}", - "location": {"path": path, "range": {"start": {"line": 1, "column": 1}}}, - "severity": "WARNING", - }) - continue - - for m in data.get("matches", []): - offset = int(m.get("offset", 0)) - length = int(m.get("length", 0)) - bad = content[offset:offset+length] - rule = m.get("rule", {}) or {} - rule_id = rule.get("id") or "UNKNOWN_RULE" - category = (rule.get("category", {}) or {}).get("id", "") - - # Custom dictionary: ignore spelling-ish matches when token is in words.txt - bad_norm = normalize_word(bad) - if dict_words and bad_norm: - looks_like_spelling = (category.upper() == "TYPOS") or ("MORFOLOGIK" in str(rule_id).upper()) - if looks_like_spelling and (bad_norm in dict_words): - continue - - sl, sc = offset_to_line_col(content, offset) - el, ec = offset_to_line_col(content, offset + max(length, 0)) - - suggestions = [] - for repl in (m.get("replacements") or [])[:MAX_SUG]: - v = repl.get("value") - if not v: - continue - suggestions.append({ - "range": {"start": {"line": sl, "column": sc}, "end": {"line": el, "column": ec}}, - "text": v, - }) - - code = {"value": rule_id} - urls = rule.get("urls") or [] - if urls and isinstance(urls, list): - u = urls[0].get("value") - if u: - code["url"] = u - - diagnostics.append({ - "message": m.get("message") or "LanguageTool finding", - "location": { - "path": path, - "range": {"start": {"line": sl, "column": sc}, "end": {"line": el, "column": ec}}, - }, - "severity": "WARNING", - "code": code, - **({"suggestions": suggestions} if suggestions else {}), - }) - - print(json.dumps({ - "source": {"name": "LanguageTool", "url": "https://languagetool.org"}, - "diagnostics": diagnostics - })) - PY - - reviewdog -f=rdjson \ - -name="LanguageTool" \ - -reporter="github-pr-review" \ - -filter-mode="diff_context" \ - -fail-level="none" \ - -level="warning" < /tmp/rd.json - - - name: Remove rerun label - if: github.event.action == 'labeled' && github.event.label.name == 'languagetool:rerun' - continue-on-error: true - uses: actions/github-script@v7 - with: - script: | - await github.rest.issues.removeLabel({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: context.payload.pull_request.number, - name: process.env.RERUN_LABEL, - }); - - - name: Stop LanguageTool - if: always() - run: docker stop languagetool || true diff --git a/.github/workflows/languagetool.yml b/.github/workflows/languagetool.yml new file mode 100644 index 00000000..fcc3aaf6 --- /dev/null +++ b/.github/workflows/languagetool.yml @@ -0,0 +1,265 @@ +name: LanguageTool (PR comment) + +on: + pull_request_target: + types: [opened, reopened, synchronize, labeled] + issue_comment: + types: [created] + +permissions: + contents: read + pull-requests: write + issues: write + +concurrency: + group: languagetool-${{ github.event.pull_request.number || github.event.issue.number || github.run_id }} + cancel-in-progress: true + +env: + LT_LANGUAGE: en-US + RERUN_LABEL: languagetool:rerun + +jobs: + # Comment command -> toggle a label to trigger the PR job + rerun_on_comment: + if: | + github.event_name == 'issue_comment' && + github.event.issue.pull_request && + startsWith(github.event.comment.body, '/languagetool') && + (github.event.comment.author_association == 'MEMBER' || + github.event.comment.author_association == 'OWNER' || + github.event.comment.author_association == 'COLLABORATOR') + runs-on: ubuntu-latest + steps: + - name: Toggle rerun label on PR + uses: actions/github-script@v7 + with: + script: | + const owner = context.repo.owner; + const repo = context.repo.repo; + const issue_number = context.issue.number; + const label = process.env.RERUN_LABEL; + + // ensure label exists + try { + await github.rest.issues.getLabel({ owner, repo, name: label }); + } catch { + await github.rest.issues.createLabel({ + owner, repo, name: label, color: '0e8a16', + description: 'Rerun LanguageTool on this PR' + }); + } + + // remove if present (ignore if missing), then add to force a "labeled" event + try { await github.rest.issues.removeLabel({ owner, repo, issue_number, name: label }); } catch {} + await github.rest.issues.addLabels({ owner, repo, issue_number, labels: [label] }); + + languagetool: + if: | + github.event_name == 'pull_request_target' && + ( + github.event.action == 'opened' || + github.event.action == 'reopened' || + github.event.action == 'synchronize' || + (github.event.action == 'labeled' && github.event.label.name == 'languagetool:rerun') + ) + runs-on: ubuntu-latest + + steps: + - name: Checkout PR head (safe) + uses: actions/checkout@v4 + with: + repository: ${{ github.event.pull_request.head.repo.full_name }} + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 + persist-credentials: false + + - name: Fetch base SHA for diff + run: | + set -euo pipefail + git remote add upstream "https://github.com/${{ github.event.pull_request.base.repo.full_name }}.git" || true + git fetch --no-tags --depth=1 upstream "${{ github.event.pull_request.base.sha }}" + + - name: Setup Java 17 + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: "17" + + - name: Download LanguageTool snapshot (CLI) + run: | + set -euo pipefail + curl -fsSL -o lt.zip "https://internal1.languagetool.org/snapshots/LanguageTool-latest-snapshot.zip" + rm -rf .lt + mkdir -p .lt + unzip -q lt.zip -d .lt + # locate the command line jar + LT_JAR="$(ls -1 .lt/**/languagetool-commandline.jar 2>/dev/null | head -n1 || true)" + if [ -z "${LT_JAR}" ]; then + echo "Could not find languagetool-commandline.jar in snapshot" >&2 + find .lt -maxdepth 3 -type f -name "*languagetool*jar" -print >&2 || true + exit 1 + fi + echo "LT_JAR=${LT_JAR}" >> "$GITHUB_ENV" + + - name: Run LanguageTool on changed text files and build PR comment + env: + BASE_SHA: ${{ github.event.pull_request.base.sha }} + HEAD_SHA: ${{ github.event.pull_request.head.sha }} + run: | + set -euo pipefail + + # Choose which files to check (edit this regex to include more types) + mapfile -t FILES < <(git diff --name-only "$BASE_SHA" "$HEAD_SHA" \ + | grep -E '\.(md|txt|rst|adoc|asciidoc|tex)$' || true) + + # Load custom words (optional) + WORDS_FILE=".languagetool/words.txt" + if [ -f "$WORDS_FILE" ]; then + WORDS_JSON="$(jq -R -s ' + split("\n") + | map(gsub("\r";"")) + | map(select(length>0 and (startswith("#")|not))) + | map(ascii_downcase) + ' "$WORDS_FILE")" + else + WORDS_JSON='[]' + fi + + : > results.jsonl + + if [ "${#FILES[@]}" -eq 0 ]; then + echo '{"file":"(none)","issues":[]}' >> results.jsonl + else + for f in "${FILES[@]}"; do + [ -f "$f" ] || continue + + # LT prints banner lines before JSON sometimes; keep JSON only + java -jar "$LT_JAR" -l "${LT_LANGUAGE}" --json "$f" 2>/dev/null \ + | sed -n '/^{/,$p' > lt.json || true + + # Extract issues and filter spelling-ish matches for custom words + jq -c \ + --arg file "$f" \ + --argjson words "$WORDS_JSON" ' + def badtoken: + (.context.text + | .[.context.offset:(.context.offset + .context.length)] + | gsub("^[^[:alnum:]]+|[^[:alnum:]]+$";"") + | ascii_downcase); + + (.matches // []) + | map( + . as $m + | ($m.rule.id // "") as $rid + | ($m.rule.category.id // "") as $cat + | (badtoken) as $bt + | select( + # drop spelling-ish warnings when the token is in our custom list + ( (( $cat == "TYPOS") or ($rid|test("MORFOLOGIK";"i")) ) + and (($words|index($bt)) != null) + ) | not + ) + | { + message: ($m.message // "LanguageTool finding"), + rule: $rid, + replacements: (($m.replacements // []) | map(.value) | .[0:3]), + context: ($m.context.text // ""), + context_offset: ($m.context.offset // 0), + context_length: ($m.context.length // 0) + } + ) + | {file:$file, issues:.} + ' lt.json >> results.jsonl + done + fi + + # Build markdown body (stored as a file) + node <<'NODE' + const fs = require("fs"); + + const marker = ""; + const lines = fs.readFileSync("results.jsonl","utf8").trim().split("\n").filter(Boolean); + const parsed = lines.map(l => JSON.parse(l)); + + const checkedFiles = parsed + .map(p => p.file) + .filter(f => f && f !== "(none)"); + const byFile = parsed + .filter(p => Array.isArray(p.issues) && p.issues.length > 0) + .reduce((acc, p) => { acc[p.file] = p.issues; return acc; }, {}); + + let total = 0; + for (const f of Object.keys(byFile)) total += byFile[f].length; + + let body = `${marker} + ## LanguageTool report + + **Language:** \`${process.env.LT_LANGUAGE || "en-US"}\` + **Checked files:** ${checkedFiles.length ? checkedFiles.length : 0} + **Findings:** ${total} + `; + + if (!checkedFiles.length) { + body += `\nNo supported text files changed in this PR (based on the file extensions configured).\n`; + } else if (total === 0) { + body += `\n✅ No issues found in the changed text files.\n`; + } else { + body += `\n---\n`; + for (const [file, issues] of Object.entries(byFile)) { + body += `\n### ${file}\n`; + for (const it of issues.slice(0, 200)) { // cap to avoid huge comments + const ctx = (it.context || "").replace(/\s+/g, " ").trim(); + const snippet = ctx ? `\n> ${ctx}\n` : ""; + const sug = (it.replacements && it.replacements.length) + ? `\nSuggested: ${it.replacements.map(s => `\`${s}\``).join(", ")}\n` + : ""; + body += `\n- **${it.rule || "RULE"}**: ${it.message}${sug}${snippet}`; + } + if (issues.length > 200) body += `\n…(${issues.length - 200} more in this file)\n`; + } + } + + fs.writeFileSync("comment.md", body.trim() + "\n"); + NODE + + - name: Post or update PR comment + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + const owner = context.repo.owner; + const repo = context.repo.repo; + const issue_number = context.payload.pull_request.number; + + const body = fs.readFileSync('comment.md', 'utf8'); + const marker = ''; + + const { data: comments } = await github.rest.issues.listComments({ + owner, repo, issue_number, per_page: 100 + }); + + const existing = comments.find(c => (c.body || '').includes(marker)); + + if (existing) { + await github.rest.issues.updateComment({ + owner, repo, comment_id: existing.id, body + }); + } else { + await github.rest.issues.createComment({ + owner, repo, issue_number, body + }); + } + + - name: Remove rerun label + if: github.event.action == 'labeled' && github.event.label.name == 'languagetool:rerun' + continue-on-error: true + uses: actions/github-script@v7 + with: + script: | + await github.rest.issues.removeLabel({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.pull_request.number, + name: process.env.RERUN_LABEL, + }); From c455ba396a10002972d4bef739161dfdeb8b33d9 Mon Sep 17 00:00:00 2001 From: CookieSource <36531905+CookieSource@users.noreply.github.com> Date: Tue, 13 Jan 2026 19:53:14 +0100 Subject: [PATCH 13/22] Update languagetool.yml --- .github/workflows/languagetool.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/languagetool.yml b/.github/workflows/languagetool.yml index fcc3aaf6..c567451c 100644 --- a/.github/workflows/languagetool.yml +++ b/.github/workflows/languagetool.yml @@ -111,7 +111,7 @@ jobs: # Choose which files to check (edit this regex to include more types) mapfile -t FILES < <(git diff --name-only "$BASE_SHA" "$HEAD_SHA" \ - | grep -E '\.(md|txt|rst|adoc|asciidoc|tex)$' || true) + | grep -E '\.(md|txt|rst|adoc|asciidoc|tex|mdx)$' || true) # Load custom words (optional) WORDS_FILE=".languagetool/words.txt" From b51c3d2425bbfde6f896a593913cd4cd6ebf7d40 Mon Sep 17 00:00:00 2001 From: CookieSource <36531905+CookieSource@users.noreply.github.com> Date: Tue, 13 Jan 2026 19:58:46 +0100 Subject: [PATCH 14/22] Update languagetool.yml --- .github/workflows/languagetool.yml | 55 +++++++++++++++++------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/.github/workflows/languagetool.yml b/.github/workflows/languagetool.yml index c567451c..83bcaa42 100644 --- a/.github/workflows/languagetool.yml +++ b/.github/workflows/languagetool.yml @@ -1,3 +1,4 @@ +# .github/workflows/languagetool.yml name: LanguageTool (PR comment) on: @@ -20,7 +21,7 @@ env: RERUN_LABEL: languagetool:rerun jobs: - # Comment command -> toggle a label to trigger the PR job + # Comment command -> toggles a label to trigger the PR job rerun_on_comment: if: | github.event_name == 'issue_comment' && @@ -40,7 +41,7 @@ jobs: const issue_number = context.issue.number; const label = process.env.RERUN_LABEL; - // ensure label exists + // Ensure label exists try { await github.rest.issues.getLabel({ owner, repo, name: label }); } catch { @@ -50,8 +51,10 @@ jobs: }); } - // remove if present (ignore if missing), then add to force a "labeled" event - try { await github.rest.issues.removeLabel({ owner, repo, issue_number, name: label }); } catch {} + // Remove if present (ignore errors), then add to force a new "labeled" event + try { + await github.rest.issues.removeLabel({ owner, repo, issue_number, name: label }); + } catch {} await github.rest.issues.addLabels({ owner, repo, issue_number, labels: [label] }); languagetool: @@ -86,32 +89,36 @@ jobs: distribution: temurin java-version: "17" - - name: Download LanguageTool snapshot (CLI) + - name: Download LanguageTool CLI (latest snapshot) run: | set -euo pipefail curl -fsSL -o lt.zip "https://internal1.languagetool.org/snapshots/LanguageTool-latest-snapshot.zip" rm -rf .lt mkdir -p .lt unzip -q lt.zip -d .lt - # locate the command line jar + LT_JAR="$(ls -1 .lt/**/languagetool-commandline.jar 2>/dev/null | head -n1 || true)" if [ -z "${LT_JAR}" ]; then echo "Could not find languagetool-commandline.jar in snapshot" >&2 - find .lt -maxdepth 3 -type f -name "*languagetool*jar" -print >&2 || true + find .lt -maxdepth 4 -type f -name "*languagetool*jar" -print >&2 || true exit 1 fi + echo "LT_JAR=${LT_JAR}" >> "$GITHUB_ENV" - - name: Run LanguageTool on changed text files and build PR comment + - name: Run LanguageTool + build PR comment env: BASE_SHA: ${{ github.event.pull_request.base.sha }} HEAD_SHA: ${{ github.event.pull_request.head.sha }} run: | set -euo pipefail - # Choose which files to check (edit this regex to include more types) + # jq is present on ubuntu-latest, but install if your runner image differs + command -v jq >/dev/null || (sudo apt-get update && sudo apt-get install -y jq) + + # Choose files to check mapfile -t FILES < <(git diff --name-only "$BASE_SHA" "$HEAD_SHA" \ - | grep -E '\.(md|txt|rst|adoc|asciidoc|tex|mdx)$' || true) + | grep -E '\.(md|mdx|txt|rst|adoc|asciidoc|tex)$' || true) # Load custom words (optional) WORDS_FILE=".languagetool/words.txt" @@ -134,19 +141,21 @@ jobs: for f in "${FILES[@]}"; do [ -f "$f" ] || continue - # LT prints banner lines before JSON sometimes; keep JSON only + # LT can print banner lines; keep JSON only (accepts either { or [) java -jar "$LT_JAR" -l "${LT_LANGUAGE}" --json "$f" 2>/dev/null \ - | sed -n '/^{/,$p' > lt.json || true + | sed -n '/^[{[]/,$p' > lt.json || true # Extract issues and filter spelling-ish matches for custom words jq -c \ --arg file "$f" \ --argjson words "$WORDS_JSON" ' def badtoken: - (.context.text - | .[.context.offset:(.context.offset + .context.length)] - | gsub("^[^[:alnum:]]+|[^[:alnum:]]+$";"") - | ascii_downcase); + (.context.offset // 0) as $o + | (.context.length // 0) as $l + | (.context.text // "") as $t + | ($t[$o:($o+$l)] + | gsub("^[^[:alnum:]]+|[^[:alnum:]]+$";"") + | ascii_downcase); (.matches // []) | map( @@ -155,7 +164,6 @@ jobs: | ($m.rule.category.id // "") as $cat | (badtoken) as $bt | select( - # drop spelling-ish warnings when the token is in our custom list ( (( $cat == "TYPOS") or ($rid|test("MORFOLOGIK";"i")) ) and (($words|index($bt)) != null) ) | not @@ -179,12 +187,11 @@ jobs: const fs = require("fs"); const marker = ""; - const lines = fs.readFileSync("results.jsonl","utf8").trim().split("\n").filter(Boolean); + const raw = fs.readFileSync("results.jsonl","utf8").trim(); + const lines = raw ? raw.split("\n").filter(Boolean) : []; const parsed = lines.map(l => JSON.parse(l)); - const checkedFiles = parsed - .map(p => p.file) - .filter(f => f && f !== "(none)"); + const checkedFiles = parsed.map(p => p.file).filter(f => f && f !== "(none)"); const byFile = parsed .filter(p => Array.isArray(p.issues) && p.issues.length > 0) .reduce((acc, p) => { acc[p.file] = p.issues; return acc; }, {}); @@ -196,19 +203,19 @@ jobs: ## LanguageTool report **Language:** \`${process.env.LT_LANGUAGE || "en-US"}\` - **Checked files:** ${checkedFiles.length ? checkedFiles.length : 0} + **Checked files:** ${checkedFiles.length} **Findings:** ${total} `; if (!checkedFiles.length) { - body += `\nNo supported text files changed in this PR (based on the file extensions configured).\n`; + body += `\nNo supported text files changed in this PR (based on configured extensions).\n`; } else if (total === 0) { body += `\n✅ No issues found in the changed text files.\n`; } else { body += `\n---\n`; for (const [file, issues] of Object.entries(byFile)) { body += `\n### ${file}\n`; - for (const it of issues.slice(0, 200)) { // cap to avoid huge comments + for (const it of issues.slice(0, 200)) { const ctx = (it.context || "").replace(/\s+/g, " ").trim(); const snippet = ctx ? `\n> ${ctx}\n` : ""; const sug = (it.replacements && it.replacements.length) From d6ef83d5ac8474ed297f7e588dbf5b6f85f6d588 Mon Sep 17 00:00:00 2001 From: CookieSource <36531905+CookieSource@users.noreply.github.com> Date: Tue, 13 Jan 2026 20:06:05 +0100 Subject: [PATCH 15/22] PR after a working version Updated LanguageTool workflow to enhance PR comment formatting and issue reporting. --- .github/workflows/languagetool.yml | 49 ++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/.github/workflows/languagetool.yml b/.github/workflows/languagetool.yml index 83bcaa42..5a842a30 100644 --- a/.github/workflows/languagetool.yml +++ b/.github/workflows/languagetool.yml @@ -106,7 +106,7 @@ jobs: echo "LT_JAR=${LT_JAR}" >> "$GITHUB_ENV" - - name: Run LanguageTool + build PR comment + - name: Run LanguageTool + build PR comment (collapsible + exact word) env: BASE_SHA: ${{ github.event.pull_request.base.sha }} HEAD_SHA: ${{ github.event.pull_request.head.sha }} @@ -149,13 +149,16 @@ jobs: jq -c \ --arg file "$f" \ --argjson words "$WORDS_JSON" ' - def badtoken: + def bad_raw: (.context.offset // 0) as $o | (.context.length // 0) as $l | (.context.text // "") as $t - | ($t[$o:($o+$l)] - | gsub("^[^[:alnum:]]+|[^[:alnum:]]+$";"") - | ascii_downcase); + | ($t[$o:($o+$l)]); + + def badtoken: + (bad_raw + | gsub("^[^[:alnum:]]+|[^[:alnum:]]+$";"") + | ascii_downcase); (.matches // []) | map( @@ -171,6 +174,7 @@ jobs: | { message: ($m.message // "LanguageTool finding"), rule: $rid, + bad: (bad_raw), replacements: (($m.replacements // []) | map(.value) | .[0:3]), context: ($m.context.text // ""), context_offset: ($m.context.offset // 0), @@ -182,7 +186,7 @@ jobs: done fi - # Build markdown body (stored as a file) + # Build markdown body (stored as a file) - collapsible per file node <<'NODE' const fs = require("fs"); @@ -199,7 +203,19 @@ jobs: let total = 0; for (const f of Object.keys(byFile)) total += byFile[f].length; - let body = `${marker} + function inlineCode(s) { + if (s == null) return ""; + return String(s).replace(/`/g, "\\`").replace(/\n/g, " ").trim(); + } + + function shortContext(text, maxLen=220) { + const t = (text || "").replace(/\s+/g, " ").trim(); + if (t.length <= maxLen) return t; + return t.slice(0, maxLen - 1) + "…"; + } + + let body = + `${marker} ## LanguageTool report **Language:** \`${process.env.LT_LANGUAGE || "en-US"}\` @@ -214,16 +230,23 @@ jobs: } else { body += `\n---\n`; for (const [file, issues] of Object.entries(byFile)) { - body += `\n### ${file}\n`; + body += `\n
\n${file} — ${issues.length} finding(s)\n\n`; for (const it of issues.slice(0, 200)) { - const ctx = (it.context || "").replace(/\s+/g, " ").trim(); - const snippet = ctx ? `\n> ${ctx}\n` : ""; + const found = inlineCode(it.bad); + const ctx = shortContext(it.context); const sug = (it.replacements && it.replacements.length) - ? `\nSuggested: ${it.replacements.map(s => `\`${s}\``).join(", ")}\n` + ? `Suggested: ${it.replacements.map(s => `\`${inlineCode(s)}\``).join(", ")}\n` : ""; - body += `\n- **${it.rule || "RULE"}**: ${it.message}${sug}${snippet}`; + body += + `- **${inlineCode(it.rule || "RULE")}**: ${inlineCode(it.message)} + - Found: \`${found}\` + - ${sug ? sug.trimEnd() : "Suggested: (none)"} + - Context: ${ctx ? `> ${ctx}` : "(none)"} + + `; } - if (issues.length > 200) body += `\n…(${issues.length - 200} more in this file)\n`; + if (issues.length > 200) body += `…(${issues.length - 200} more in this file)\n\n`; + body += `
\n`; } } From 1f7a335973b6cdb3194cde890cc8128006c21218 Mon Sep 17 00:00:00 2001 From: Alice <36531905+CookieSource@users.noreply.github.com> Date: Thu, 15 Jan 2026 18:56:44 +0100 Subject: [PATCH 16/22] Configure Dependabot for npm updates Specify npm as the package ecosystem and allow updates for astro and @astrojs/starlight dependencies. --- .github/dependabot.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..ca89712e --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,13 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file +version: 2 +updates: + - package-ecosystem: "npm" + directory: "/" + schedule: + interval: "weekly" + allow: + - dependency-name: "astro" + - dependency-name: "@astrojs/starlight" From d24353fa1dc16815d29bee0ad40f081d89dce4f6 Mon Sep 17 00:00:00 2001 From: Alice <36531905+CookieSource@users.noreply.github.com> Date: Thu, 15 Jan 2026 19:51:08 +0100 Subject: [PATCH 17/22] Change Dependabot update interval to monthly --- .github/dependabot.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index ca89712e..d0febe6f 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -7,7 +7,7 @@ updates: - package-ecosystem: "npm" directory: "/" schedule: - interval: "weekly" + interval: "monthly" allow: - dependency-name: "astro" - dependency-name: "@astrojs/starlight" From 259bf69272270a89736684b4e045f13f1b7f2886 Mon Sep 17 00:00:00 2001 From: Alice <36531905+CookieSource@users.noreply.github.com> Date: Thu, 15 Jan 2026 20:16:32 +0100 Subject: [PATCH 18/22] Refactor LanguageTool workflow for reviewdog integration --- .github/workflows/languagetool.yml | 304 ++--------------------------- 1 file changed, 12 insertions(+), 292 deletions(-) diff --git a/.github/workflows/languagetool.yml b/.github/workflows/languagetool.yml index 5a842a30..307462d4 100644 --- a/.github/workflows/languagetool.yml +++ b/.github/workflows/languagetool.yml @@ -1,295 +1,15 @@ -# .github/workflows/languagetool.yml -name: LanguageTool (PR comment) - -on: - pull_request_target: - types: [opened, reopened, synchronize, labeled] - issue_comment: - types: [created] - -permissions: - contents: read - pull-requests: write - issues: write - -concurrency: - group: languagetool-${{ github.event.pull_request.number || github.event.issue.number || github.run_id }} - cancel-in-progress: true - -env: - LT_LANGUAGE: en-US - RERUN_LABEL: languagetool:rerun - +name: reviewdog +on: [pull_request] jobs: - # Comment command -> toggles a label to trigger the PR job - rerun_on_comment: - if: | - github.event_name == 'issue_comment' && - github.event.issue.pull_request && - startsWith(github.event.comment.body, '/languagetool') && - (github.event.comment.author_association == 'MEMBER' || - github.event.comment.author_association == 'OWNER' || - github.event.comment.author_association == 'COLLABORATOR') + linter_name: + name: LanguageTool grammar check runs-on: ubuntu-latest steps: - - name: Toggle rerun label on PR - uses: actions/github-script@v7 - with: - script: | - const owner = context.repo.owner; - const repo = context.repo.repo; - const issue_number = context.issue.number; - const label = process.env.RERUN_LABEL; - - // Ensure label exists - try { - await github.rest.issues.getLabel({ owner, repo, name: label }); - } catch { - await github.rest.issues.createLabel({ - owner, repo, name: label, color: '0e8a16', - description: 'Rerun LanguageTool on this PR' - }); - } - - // Remove if present (ignore errors), then add to force a new "labeled" event - try { - await github.rest.issues.removeLabel({ owner, repo, issue_number, name: label }); - } catch {} - await github.rest.issues.addLabels({ owner, repo, issue_number, labels: [label] }); - - languagetool: - if: | - github.event_name == 'pull_request_target' && - ( - github.event.action == 'opened' || - github.event.action == 'reopened' || - github.event.action == 'synchronize' || - (github.event.action == 'labeled' && github.event.label.name == 'languagetool:rerun') - ) - runs-on: ubuntu-latest - - steps: - - name: Checkout PR head (safe) - uses: actions/checkout@v4 - with: - repository: ${{ github.event.pull_request.head.repo.full_name }} - ref: ${{ github.event.pull_request.head.sha }} - fetch-depth: 0 - persist-credentials: false - - - name: Fetch base SHA for diff - run: | - set -euo pipefail - git remote add upstream "https://github.com/${{ github.event.pull_request.base.repo.full_name }}.git" || true - git fetch --no-tags --depth=1 upstream "${{ github.event.pull_request.base.sha }}" - - - name: Setup Java 17 - uses: actions/setup-java@v4 - with: - distribution: temurin - java-version: "17" - - - name: Download LanguageTool CLI (latest snapshot) - run: | - set -euo pipefail - curl -fsSL -o lt.zip "https://internal1.languagetool.org/snapshots/LanguageTool-latest-snapshot.zip" - rm -rf .lt - mkdir -p .lt - unzip -q lt.zip -d .lt - - LT_JAR="$(ls -1 .lt/**/languagetool-commandline.jar 2>/dev/null | head -n1 || true)" - if [ -z "${LT_JAR}" ]; then - echo "Could not find languagetool-commandline.jar in snapshot" >&2 - find .lt -maxdepth 4 -type f -name "*languagetool*jar" -print >&2 || true - exit 1 - fi - - echo "LT_JAR=${LT_JAR}" >> "$GITHUB_ENV" - - - name: Run LanguageTool + build PR comment (collapsible + exact word) - env: - BASE_SHA: ${{ github.event.pull_request.base.sha }} - HEAD_SHA: ${{ github.event.pull_request.head.sha }} - run: | - set -euo pipefail - - # jq is present on ubuntu-latest, but install if your runner image differs - command -v jq >/dev/null || (sudo apt-get update && sudo apt-get install -y jq) - - # Choose files to check - mapfile -t FILES < <(git diff --name-only "$BASE_SHA" "$HEAD_SHA" \ - | grep -E '\.(md|mdx|txt|rst|adoc|asciidoc|tex)$' || true) - - # Load custom words (optional) - WORDS_FILE=".languagetool/words.txt" - if [ -f "$WORDS_FILE" ]; then - WORDS_JSON="$(jq -R -s ' - split("\n") - | map(gsub("\r";"")) - | map(select(length>0 and (startswith("#")|not))) - | map(ascii_downcase) - ' "$WORDS_FILE")" - else - WORDS_JSON='[]' - fi - - : > results.jsonl - - if [ "${#FILES[@]}" -eq 0 ]; then - echo '{"file":"(none)","issues":[]}' >> results.jsonl - else - for f in "${FILES[@]}"; do - [ -f "$f" ] || continue - - # LT can print banner lines; keep JSON only (accepts either { or [) - java -jar "$LT_JAR" -l "${LT_LANGUAGE}" --json "$f" 2>/dev/null \ - | sed -n '/^[{[]/,$p' > lt.json || true - - # Extract issues and filter spelling-ish matches for custom words - jq -c \ - --arg file "$f" \ - --argjson words "$WORDS_JSON" ' - def bad_raw: - (.context.offset // 0) as $o - | (.context.length // 0) as $l - | (.context.text // "") as $t - | ($t[$o:($o+$l)]); - - def badtoken: - (bad_raw - | gsub("^[^[:alnum:]]+|[^[:alnum:]]+$";"") - | ascii_downcase); - - (.matches // []) - | map( - . as $m - | ($m.rule.id // "") as $rid - | ($m.rule.category.id // "") as $cat - | (badtoken) as $bt - | select( - ( (( $cat == "TYPOS") or ($rid|test("MORFOLOGIK";"i")) ) - and (($words|index($bt)) != null) - ) | not - ) - | { - message: ($m.message // "LanguageTool finding"), - rule: $rid, - bad: (bad_raw), - replacements: (($m.replacements // []) | map(.value) | .[0:3]), - context: ($m.context.text // ""), - context_offset: ($m.context.offset // 0), - context_length: ($m.context.length // 0) - } - ) - | {file:$file, issues:.} - ' lt.json >> results.jsonl - done - fi - - # Build markdown body (stored as a file) - collapsible per file - node <<'NODE' - const fs = require("fs"); - - const marker = ""; - const raw = fs.readFileSync("results.jsonl","utf8").trim(); - const lines = raw ? raw.split("\n").filter(Boolean) : []; - const parsed = lines.map(l => JSON.parse(l)); - - const checkedFiles = parsed.map(p => p.file).filter(f => f && f !== "(none)"); - const byFile = parsed - .filter(p => Array.isArray(p.issues) && p.issues.length > 0) - .reduce((acc, p) => { acc[p.file] = p.issues; return acc; }, {}); - - let total = 0; - for (const f of Object.keys(byFile)) total += byFile[f].length; - - function inlineCode(s) { - if (s == null) return ""; - return String(s).replace(/`/g, "\\`").replace(/\n/g, " ").trim(); - } - - function shortContext(text, maxLen=220) { - const t = (text || "").replace(/\s+/g, " ").trim(); - if (t.length <= maxLen) return t; - return t.slice(0, maxLen - 1) + "…"; - } - - let body = - `${marker} - ## LanguageTool report - - **Language:** \`${process.env.LT_LANGUAGE || "en-US"}\` - **Checked files:** ${checkedFiles.length} - **Findings:** ${total} - `; - - if (!checkedFiles.length) { - body += `\nNo supported text files changed in this PR (based on configured extensions).\n`; - } else if (total === 0) { - body += `\n✅ No issues found in the changed text files.\n`; - } else { - body += `\n---\n`; - for (const [file, issues] of Object.entries(byFile)) { - body += `\n
\n${file} — ${issues.length} finding(s)\n\n`; - for (const it of issues.slice(0, 200)) { - const found = inlineCode(it.bad); - const ctx = shortContext(it.context); - const sug = (it.replacements && it.replacements.length) - ? `Suggested: ${it.replacements.map(s => `\`${inlineCode(s)}\``).join(", ")}\n` - : ""; - body += - `- **${inlineCode(it.rule || "RULE")}**: ${inlineCode(it.message)} - - Found: \`${found}\` - - ${sug ? sug.trimEnd() : "Suggested: (none)"} - - Context: ${ctx ? `> ${ctx}` : "(none)"} - - `; - } - if (issues.length > 200) body += `…(${issues.length - 200} more in this file)\n\n`; - body += `
\n`; - } - } - - fs.writeFileSync("comment.md", body.trim() + "\n"); - NODE - - - name: Post or update PR comment - uses: actions/github-script@v7 - with: - script: | - const fs = require('fs'); - const owner = context.repo.owner; - const repo = context.repo.repo; - const issue_number = context.payload.pull_request.number; - - const body = fs.readFileSync('comment.md', 'utf8'); - const marker = ''; - - const { data: comments } = await github.rest.issues.listComments({ - owner, repo, issue_number, per_page: 100 - }); - - const existing = comments.find(c => (c.body || '').includes(marker)); - - if (existing) { - await github.rest.issues.updateComment({ - owner, repo, comment_id: existing.id, body - }); - } else { - await github.rest.issues.createComment({ - owner, repo, issue_number, body - }); - } - - - name: Remove rerun label - if: github.event.action == 'labeled' && github.event.label.name == 'languagetool:rerun' - continue-on-error: true - uses: actions/github-script@v7 - with: - script: | - await github.rest.issues.removeLabel({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: context.payload.pull_request.number, - name: process.env.RERUN_LABEL, - }); + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: reviewdog/action-languagetool@ea19c757470ce0dbfcbc34aec090317cef1ff0b5 # v1.22.0 + with: + github_token: ${{ secrets.github_token }} + # Change reviewdog reporter if you need [github-pr-check,github-check,github-pr-review]. + reporter: github-pr-review + # Change reporter level if you need. + level: info From a5900aa00c5fc367529c85ebf292c26118c959ff Mon Sep 17 00:00:00 2001 From: Alice <36531905+CookieSource@users.noreply.github.com> Date: Thu, 15 Jan 2026 20:22:48 +0100 Subject: [PATCH 19/22] Update languagetool.yml --- .github/workflows/languagetool.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/languagetool.yml b/.github/workflows/languagetool.yml index 307462d4..3ce298f1 100644 --- a/.github/workflows/languagetool.yml +++ b/.github/workflows/languagetool.yml @@ -13,3 +13,4 @@ jobs: reporter: github-pr-review # Change reporter level if you need. level: info + patterns: "**/*.md **/*.mdx **/*.txt" From c22aacd228fad6eed288b631f7184e1761860ea6 Mon Sep 17 00:00:00 2001 From: Alice <36531905+CookieSource@users.noreply.github.com> Date: Mon, 19 Jan 2026 21:41:27 +0100 Subject: [PATCH 20/22] Update GitHub Actions workflow for grammar checks harper --- .github/workflows/languagetool.yml | 92 ++++++++++++++++++++++++++---- 1 file changed, 80 insertions(+), 12 deletions(-) diff --git a/.github/workflows/languagetool.yml b/.github/workflows/languagetool.yml index 3ce298f1..9666f661 100644 --- a/.github/workflows/languagetool.yml +++ b/.github/workflows/languagetool.yml @@ -1,16 +1,84 @@ -name: reviewdog -on: [pull_request] +name: Harper (grammar suggestions) + +on: + pull_request: + +permissions: + contents: read + pull-requests: write + jobs: - linter_name: - name: LanguageTool grammar check + harper: runs-on: ubuntu-latest + steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - uses: reviewdog/action-languagetool@ea19c757470ce0dbfcbc34aec090317cef1ff0b5 # v1.22.0 + - uses: actions/checkout@v4 with: - github_token: ${{ secrets.github_token }} - # Change reviewdog reporter if you need [github-pr-check,github-check,github-pr-review]. - reporter: github-pr-review - # Change reporter level if you need. - level: info - patterns: "**/*.md **/*.mdx **/*.txt" + fetch-depth: 0 + + - name: Install Harper + run: | + curl -fsSL https://raw.githubusercontent.com/automattic/harper/main/install.sh | sh + echo "$HOME/.harper/bin" >> $GITHUB_PATH + + - name: Run Harper on changed md/mdx + id: harper + run: | + set -euo pipefail + + mapfile -t FILES < <( + git diff --name-only origin/${{ github.base_ref }}...${{ github.sha }} \ + -- '*.md' '*.mdx' || true + ) + + if [ ${#FILES[@]} -eq 0 ]; then + echo "No markdown files changed." > harper.txt + exit 0 + fi + + { + echo "## Harper grammar suggestions" + echo + for f in "${FILES[@]}"; do + [ -f "$f" ] || continue + echo "### $f" + harper "$f" || true + echo + done + } > harper.txt + + - name: Post PR comment + uses: actions/github-script@v7 + with: + script: | + const fs = require("fs"); + const body = fs.readFileSync("harper.txt", "utf8").trim(); + + const marker = ""; + const commentBody = `${marker}\n${body || "No issues found."}`; + + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + + const existing = comments.find(c => + c.body && c.body.includes(marker) + ); + + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body: commentBody, + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: commentBody, + }); + } From 8d6076464a2458dfc873d0a0a24b0368bef8984c Mon Sep 17 00:00:00 2001 From: Alice <36531905+CookieSource@users.noreply.github.com> Date: Wed, 21 Jan 2026 19:55:21 +0100 Subject: [PATCH 21/22] Update and rename languagetool.yml to checkspelling.yml --- .github/workflows/checkspelling.yml | 154 ++++++++++++++++++++++++++++ .github/workflows/languagetool.yml | 84 --------------- 2 files changed, 154 insertions(+), 84 deletions(-) create mode 100644 .github/workflows/checkspelling.yml delete mode 100644 .github/workflows/languagetool.yml diff --git a/.github/workflows/checkspelling.yml b/.github/workflows/checkspelling.yml new file mode 100644 index 00000000..d8ec2885 --- /dev/null +++ b/.github/workflows/checkspelling.yml @@ -0,0 +1,154 @@ +name: Harper (grammar + spelling) + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + +permissions: + contents: read + pull-requests: write + +jobs: + harper: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Rust + uses: dtolnay/rust-toolchain@stable + + - name: Cache Cargo + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo- + + - name: Install harper-cli + run: | + cargo install --locked --git https://github.com/Automattic/harper.git harper-cli + + - name: Write Harper dictionary + env: + XDG_CONFIG_HOME: ${{ github.workspace }}/.xdg + run: | + mkdir -p "$XDG_CONFIG_HOME/harper-ls" + cat > "$XDG_CONFIG_HOME/harper-ls/dictionary.txt" <<'EOF' + AerynOS + astrojs + sha256sum + SHA256 + certutil + hashfile + lastUpdated + EOF + + - name: Run Harper on PR-changed files + id: harper + env: + XDG_CONFIG_HOME: ${{ github.workspace }}/.xdg + run: | + set -euo pipefail + + BASE="${{ github.event.pull_request.base.sha }}" + HEAD="${{ github.event.pull_request.head.sha }}" + + mapfile -t FILES < <(git diff --name-only "$BASE" "$HEAD" -- \ + '*.md' '*.mdx' '*.txt' || true) + + : > harper-report.txt + + if [ "${#FILES[@]}" -eq 0 ]; then + echo "No matching files changed (.md/.mdx/.txt)." > harper-report.txt + echo "fail=0" >> "$GITHUB_OUTPUT" + exit 0 + fi + + fail=0 + for f in "${FILES[@]}"; do + if [ ! -f "$f" ]; then + continue + fi + + echo "===== $f =====" >> harper-report.txt + echo >> harper-report.txt + + out="$(harper-cli lint "$f" || true)" + echo "$out" >> harper-report.txt + echo >> harper-report.txt + + after="$(printf '%s\n' "$out" | sed -n 's/.*after overlap removal, \([0-9]\+\) after.*/\1/p' | tail -n 1)" + after="${after:-0}" + + if [ "$after" -ne 0 ]; then + fail=1 + fi + done + + echo "fail=$fail" >> "$GITHUB_OUTPUT" + + - name: Comment on PR with Harper output + if: github.event.pull_request + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + + const raw = fs.readFileSync('harper-report.txt', 'utf8'); + + const limit = 65000; + const clipped = + raw.length > limit + ? raw.slice(0, limit) + "\n\n[truncated]\n" + : raw; + + const marker = ''; + const body = + `${marker}\n` + + `
\n` + + `Harper output\n\n` + + "```text\n" + + clipped + + "\n```\n" + + `
\n`; + + const { owner, repo } = context.repo; + const issue_number = context.issue.number; + + const comments = await github.rest.issues.listComments({ + owner, + repo, + issue_number, + }); + + const existing = comments.data.find(c => + c.body && c.body.includes(marker) + ); + + if (existing) { + await github.rest.issues.updateComment({ + owner, + repo, + comment_id: existing.id, + body, + }); + } else { + await github.rest.issues.createComment({ + owner, + repo, + issue_number, + body, + }); + } + + - name: Fail if Harper found issues + if: steps.harper.outputs.fail == '1' + run: exit 1 diff --git a/.github/workflows/languagetool.yml b/.github/workflows/languagetool.yml deleted file mode 100644 index 9666f661..00000000 --- a/.github/workflows/languagetool.yml +++ /dev/null @@ -1,84 +0,0 @@ -name: Harper (grammar suggestions) - -on: - pull_request: - -permissions: - contents: read - pull-requests: write - -jobs: - harper: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Install Harper - run: | - curl -fsSL https://raw.githubusercontent.com/automattic/harper/main/install.sh | sh - echo "$HOME/.harper/bin" >> $GITHUB_PATH - - - name: Run Harper on changed md/mdx - id: harper - run: | - set -euo pipefail - - mapfile -t FILES < <( - git diff --name-only origin/${{ github.base_ref }}...${{ github.sha }} \ - -- '*.md' '*.mdx' || true - ) - - if [ ${#FILES[@]} -eq 0 ]; then - echo "No markdown files changed." > harper.txt - exit 0 - fi - - { - echo "## Harper grammar suggestions" - echo - for f in "${FILES[@]}"; do - [ -f "$f" ] || continue - echo "### $f" - harper "$f" || true - echo - done - } > harper.txt - - - name: Post PR comment - uses: actions/github-script@v7 - with: - script: | - const fs = require("fs"); - const body = fs.readFileSync("harper.txt", "utf8").trim(); - - const marker = ""; - const commentBody = `${marker}\n${body || "No issues found."}`; - - const { data: comments } = await github.rest.issues.listComments({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: context.issue.number, - }); - - const existing = comments.find(c => - c.body && c.body.includes(marker) - ); - - if (existing) { - await github.rest.issues.updateComment({ - owner: context.repo.owner, - repo: context.repo.repo, - comment_id: existing.id, - body: commentBody, - }); - } else { - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: context.issue.number, - body: commentBody, - }); - } From 4aa2f92b8a6d2e5c5060267783d39dfc889c24a7 Mon Sep 17 00:00:00 2001 From: Alice <36531905+CookieSource@users.noreply.github.com> Date: Wed, 21 Jan 2026 19:56:37 +0100 Subject: [PATCH 22/22] Update index.mdx --- src/content/docs/Users/System Management/index.mdx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/content/docs/Users/System Management/index.mdx b/src/content/docs/Users/System Management/index.mdx index 7b74c1a3..f78ab147 100644 --- a/src/content/docs/Users/System Management/index.mdx +++ b/src/content/docs/Users/System Management/index.mdx @@ -8,3 +8,5 @@ import DirectoryList from '@components/DirectoryList.astro'; Use this section to manage an installed system, from understanding where configuration lives to operating moss states safely. + + Their may be multiple versions available with different desktop environments donoted by some person that wasn't present today.