From b3a2248e62a20898293ea4281a01edb276f052bd Mon Sep 17 00:00:00 2001
From: Brian Burt <bburt@redhat.com>
Date: Fri, 24 Apr 2026 12:58:59 -0400
Subject: [PATCH 1/4] feat(docs-convert-gdoc): Add Google Docs comment
 extraction as footnotes

Pull comment threads from the Drive v3 API and insert them as Markdown
footnotes when --comments is passed.  Resolved threads are excluded by
default; --include-resolved brings them back.  Anchor matching uses
whitespace-normalized fuzzy search with word-boundary snapping so
footnote references never split a word.

Argument parsing migrated from sys.argv to argparse; backward compatible.

Closes #112

Made-with: Cursor
---
 plugins/docs-tools/.claude-plugin/plugin.json |   2 +-
 .../skills/docs-convert-gdoc-md/SKILL.md      |  16 +-
 .../docs-convert-gdoc-md/scripts/gdoc2md.py   | 271 ++++++++++++++++--
 3 files changed, 269 insertions(+), 20 deletions(-)
diff --git a/plugins/docs-tools/.claude-plugin/plugin.json b/plugins/docs-tools/.claude-plugin/plugin.json
index ac667674..ef5ebe2f 100644
--- a/plugins/docs-tools/.claude-plugin/plugin.json
+++ b/plugins/docs-tools/.claude-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "docs-tools",
-  "version": "0.0.52",
+  "version": "0.0.53",
   "description": "Documentation review, writing, and workflow tools for Red Hat AsciiDoc and Markdown documentation.",
   "author": {
     "name": "Red Hat Documentation Team",
diff --git a/plugins/docs-tools/skills/docs-convert-gdoc-md/SKILL.md b/plugins/docs-tools/skills/docs-convert-gdoc-md/SKILL.md
index 5e8fcdb7..c3f9c644 100644
--- a/plugins/docs-tools/skills/docs-convert-gdoc-md/SKILL.md
+++ b/plugins/docs-tools/skills/docs-convert-gdoc-md/SKILL.md
@@ -33,7 +33,7 @@ The script is at `${CLAUDE_SKILL_DIR}/scripts/gdoc2md.py`.
 Always quote the URL and output file arguments:
 
 ```bash
-python3 ${CLAUDE_SKILL_DIR}/scripts/gdoc2md.py "<url>" ["<output_file>"]
+python3 ${CLAUDE_SKILL_DIR}/scripts/gdoc2md.py [--comments] [--include-resolved] "<url>" ["<output_file>"]
 ```
 
 - The script auto-detects the URL type:
@@ -42,6 +42,20 @@ python3 ${CLAUDE_SKILL_DIR}/scripts/gdoc2md.py "<url>" ["<output_file>"]
   - `/spreadsheets/d/` → Google Sheets → CSV
 - If no output file is specified, it defaults to `<id>.md` or `<id>.csv`.
 
+### Include Google Docs comments
+
+Use `--comments` to pull comment threads from the document and insert them as Markdown footnotes:
+
+```bash
+python3 ${CLAUDE_SKILL_DIR}/scripts/gdoc2md.py --comments "<google-doc-url>"
+```
+
+- Each comment with a highlighted text anchor becomes a footnote reference placed after the quoted text in the Markdown body.
+- Comments without an anchor appear as footnotes at the end.
+- Reply threads are included under the parent comment.
+- By default, resolved comment threads are excluded. Add `--include-resolved` to include them.
+- The `--comments` flag only applies to Google Docs. The script ignores it for Slides and Sheets.
+
 ### Error handling
 
 - **401**: Authentication expired. Tell the user to run `gcloud auth login --enable-gdrive-access`.
diff --git a/plugins/docs-tools/skills/docs-convert-gdoc-md/scripts/gdoc2md.py b/plugins/docs-tools/skills/docs-convert-gdoc-md/scripts/gdoc2md.py
index 3aaeadba..f3ea2d03 100644
--- a/plugins/docs-tools/skills/docs-convert-gdoc-md/scripts/gdoc2md.py
+++ b/plugins/docs-tools/skills/docs-convert-gdoc-md/scripts/gdoc2md.py
@@ -1,12 +1,14 @@
 """
 Export Google Docs to Markdown, Slides to Markdown (via PPTX),
-or Sheets to CSV.
+or Sheets to CSV.  Optionally include Google Docs comments as
+Markdown footnotes.
 
 Requires gcloud CLI and python-pptx (for Slides export).
 
-python gdoc2md.py <google-doc-or-slides-or-sheets-url> [output]
+python3 ${CLAUDE_SKILL_DIR}/scripts/gdoc2md.py [--comments] [--include-resolved] <url> [output]
 """
 
+import argparse
 import json
 import re
 import subprocess
@@ -15,6 +17,7 @@
 from io import BytesIO
 from pathlib import Path
 from urllib.error import HTTPError
+from urllib.parse import quote
 from urllib.request import Request, urlopen
 
 # tolerates trailing segments like /edit, /view, ?usp=sharing
@@ -38,26 +41,44 @@
 
 
 def parse_and_validate_args():
-    if len(sys.argv) < 2:
-        print(f"Usage: {sys.argv[0]} <google-doc-or-slides-or-sheets-url> [output]")
-        sys.exit(1)
+    """Parse CLI arguments and return (file_id, output, mode, comments, include_resolved)."""
+    parser = argparse.ArgumentParser(
+        description="Export Google Docs/Slides/Sheets to Markdown or CSV.",
+    )
+    parser.add_argument("url", help="Google Docs, Slides, or Sheets URL")
+    parser.add_argument("output", nargs="?", default=None, help="Output file path")
+    parser.add_argument(
+        "--comments",
+        action="store_true",
+        help="Include Google Docs comments as Markdown footnotes (Docs only)",
+    )
+    parser.add_argument(
+        "--include-resolved",
+        action="store_true",
+        help="Include resolved comment threads (requires --comments)",
+    )
+    args = parser.parse_args()
+
+    if args.include_resolved and not args.comments:
+        parser.error("--include-resolved requires --comments")
 
-    url = sys.argv[1]
-    match = VALID_URL_RE.match(url)
+    match = VALID_URL_RE.match(args.url)
     if not match:
-        print(
-            "Error: URL must be a Google Docs, Slides, or Sheets URL (https://docs.google.com/...)",
-            file=sys.stderr,
+        parser.error(
+            "URL must be a Google Docs, Slides, or Sheets URL (https://docs.google.com/...)"
         )
-        sys.exit(1)
 
     mode = MODE_MAP[match.group("mode")]
     file_id = match.group("id")
+    output = args.output or f"{file_id}{EXTENSIONS[mode]}"
 
-    explicit_output = sys.argv[2] if len(sys.argv) > 2 else None
-    output = explicit_output or f"{file_id}{EXTENSIONS[mode]}"
+    if args.comments and mode != "doc":
+        print(
+            "Warning: --comments is only supported for Google Docs, ignoring.",
+            file=sys.stderr,
+        )
 
-    return file_id, output, mode
+    return file_id, output, mode, args.comments, args.include_resolved
 
 
 # ---------------------------------------------------------------------------
@@ -66,6 +87,7 @@ def parse_and_validate_args():
 
 
 def check_dependencies():
+    """Verify that the gcloud CLI is installed, exiting with guidance if not."""
     result = subprocess.run(["gcloud", "version"], capture_output=True)  # noqa: S607
     if result.returncode != 0:
         print("Error: gcloud CLI is not installed.", file=sys.stderr)
@@ -127,6 +149,7 @@ def get_token() -> str:
 
 
 def download(url: str, token: str, retries: int = 3) -> bytes:
+    """GET *url* with Bearer auth and exponential back-off on 429 responses."""
     req = Request(url, headers={"Authorization": f"Bearer {token}"})  # noqa: S310
     for attempt in range(retries + 1):
         try:
@@ -243,12 +266,207 @@ def _sanitize_filename(name: str) -> str:
     return re.sub(r'[\\/*?:"<>|]', "_", name)
 
 
+# ---------------------------------------------------------------------------
+# Google Docs comments → Markdown footnotes
+# ---------------------------------------------------------------------------
+
+
+def fetch_comments(
+    file_id: str,
+    token: str,
+    include_resolved: bool = False,
+) -> list[dict]:
+    """Fetch comment threads from the Drive v3 API.
+
+    Returns a list of dicts with keys: author, content, quoted_text,
+    resolved, and replies (list of {author, content}).
+    """
+    fields = (
+        "nextPageToken,"
+        "comments(id,content,resolved,author/displayName,"
+        "quotedFileContent/value,replies(content,author/displayName))"
+    )
+    comments = []
+    page_token = None
+    while True:
+        api_url = (
+            f"https://www.googleapis.com/drive/v3/files/{file_id}/comments"
+            f"?fields={quote(fields, safe='()/,')}&includeDeleted=false"
+            f"&pageSize=100"
+        )
+        if page_token:
+            api_url += f"&pageToken={quote(page_token)}"
+
+        data = json.loads(download(api_url, token))
+        for c in data.get("comments", []):
+            resolved = c.get("resolved", False)
+            if resolved and not include_resolved:
+                continue
+            quoted = (c.get("quotedFileContent") or {}).get("value", "")
+            replies = [
+                {
+                    "author": r.get("author", {}).get("displayName", "Unknown"),
+                    "content": r.get("content", ""),
+                }
+                for r in c.get("replies", [])
+            ]
+            comments.append(
+                {
+                    "author": c.get("author", {}).get("displayName", "Unknown"),
+                    "content": c.get("content", ""),
+                    "quoted_text": quoted,
+                    "resolved": resolved,
+                    "replies": replies,
+                }
+            )
+
+        page_token = data.get("nextPageToken")
+        if not page_token:
+            break
+
+    return comments
+
+
+def _normalize(text: str) -> str:
+    """Collapse whitespace for fuzzy anchor matching."""
+    return re.sub(r"\s+", " ", text).strip()
+
+
+def insert_comment_footnotes(
+    markdown: str,
+    comments: list[dict],
+) -> str:
+    """Insert footnote references into the Markdown body and append
+    footnote definitions at the end of the file.
+
+    Matching strategy: for each comment with a quoted anchor, find the
+    first occurrence of that anchor text in the Markdown (normalized
+    whitespace) and insert a footnote reference after it.  Comments
+    without an anchor are appended as unanchored footnotes at the end.
+    """
+    if not comments:
+        return markdown
+
+    footnotes: list[str] = []
+    fn_index = 1
+    used_positions: set[int] = set()
+
+    norm_md = _normalize(markdown)
+
+    for comment in comments:
+        anchor = comment["quoted_text"]
+        label = f"[^{fn_index}]"
+
+        body_parts = []
+        status = " (resolved)" if comment["resolved"] else ""
+        body_parts.append(f"**{comment['author']}{status}:** {_normalize(comment['content'])}")
+        for reply in comment["replies"]:
+            body_parts.append(f"    **{reply['author']}:** {_normalize(reply['content'])}")
+        footnote_def = f"{label}: " + " \\\n".join(body_parts)
+
+        norm_anchor = _normalize(anchor) if anchor else ""
+        if norm_anchor:
+            search_from = 0
+            pos = -1
+            while True:
+                candidate = norm_md.find(norm_anchor, search_from)
+                if candidate == -1:
+                    break
+                if candidate not in used_positions:
+                    pos = candidate
+                    break
+                search_from = candidate + 1
+            if pos != -1:
+                used_positions.add(pos)
+                end_of_anchor = _find_original_end(
+                    markdown,
+                    norm_md,
+                    pos,
+                    len(norm_anchor),
+                )
+                end_of_anchor = _snap_to_word_boundary(
+                    markdown,
+                    end_of_anchor,
+                )
+                markdown = markdown[:end_of_anchor] + label + markdown[end_of_anchor:]
+                norm_md = _normalize(markdown)
+                footnotes.append(footnote_def)
+                fn_index += 1
+                continue
+
+        footnotes.append(footnote_def)
+        fn_index += 1
+
+    if footnotes:
+        markdown = markdown.rstrip() + "\n\n---\n\n"
+        markdown += "\n".join(footnotes) + "\n"
+
+    return markdown
+
+
+def _find_original_end(
+    original: str,
+    normalized: str,
+    norm_pos: int,
+    norm_len: int,
+) -> int:
+    """Map a position in the normalized string back to the original.
+
+    Walk through the original string, tracking how many non-collapsed
+    characters have been consumed, to find where the anchor ends in
+    the original text.
+    """
+    consumed = 0
+    i = 0
+    in_space = False
+
+    while i < len(original) and consumed < norm_pos:
+        if original[i].isspace():
+            if not in_space:
+                consumed += 1
+                in_space = True
+        else:
+            consumed += 1
+            in_space = False
+        i += 1
+
+    chars_left = norm_len
+    while i < len(original) and chars_left > 0:
+        if original[i].isspace():
+            if not in_space:
+                chars_left -= 1
+                in_space = True
+        else:
+            chars_left -= 1
+            in_space = False
+        i += 1
+
+    return i
+
+
+def _snap_to_word_boundary(text: str, pos: int) -> int:
+    """Advance *pos* past any remaining word characters so the footnote
+    reference never splits a word.  Stops at whitespace, punctuation
+    that commonly follows words, or end-of-string.
+    """
+    while pos < len(text) and text[pos].isalnum():
+        pos += 1
+    return pos
+
+
 # ---------------------------------------------------------------------------
 # Fetch & write
 # ---------------------------------------------------------------------------
 
 
-def fetch(file_id: str, output: str, mode: str):
+def fetch(
+    file_id: str,
+    output: str,
+    mode: str,
+    include_comments: bool = False,
+    include_resolved: bool = False,
+):
+    """Download and convert a Google Docs/Slides/Sheets file, writing the result to *output*."""
     token = get_token()
     base = "https://docs.google.com"
 
@@ -272,6 +490,23 @@ def fetch(file_id: str, output: str, mode: str):
 
     if mode == "slides":
         output_path.write_text(pptx_to_markdown(data), encoding="utf-8")
+    elif mode == "doc":
+        md_text = data.decode("utf-8")
+        if include_comments:
+            comments = fetch_comments(
+                file_id,
+                token,
+                include_resolved,
+            )
+            if comments:
+                md_text = insert_comment_footnotes(md_text, comments)
+                print(
+                    f"Inserted {len(comments)} comment(s) as footnotes.",
+                    file=sys.stderr,
+                )
+            else:
+                print("No comments found.", file=sys.stderr)
+        output_path.write_text(md_text, encoding="utf-8")
     else:
         output_path.write_bytes(data)
 
@@ -328,10 +563,10 @@ def _fetch_sheets(file_id: str, output: str, token: str, base: str):
 
 
 def main():
-    # Validate args first — fast failure before any subprocess calls
-    file_id, output, mode = parse_and_validate_args()
+    """CLI entry point: parse arguments, check dependencies, and run the export."""
+    file_id, output, mode, comments, include_resolved = parse_and_validate_args()
     check_dependencies()
-    fetch(file_id, output, mode)
+    fetch(file_id, output, mode, comments, include_resolved)
 
 
 if __name__ == "__main__":

From 3b674abefc49ad6a8838e2a7fbda1c026087f306 Mon Sep 17 00:00:00 2001
From: Brian Burt <bburt@redhat.com>
Date: Fri, 24 Apr 2026 16:06:51 -0400
Subject: [PATCH 2/4] fix(docs-convert-gdoc): track stable markdown offsets for
 duplicate anchors

used_positions stored norm_md offsets that became stale after each
footnote insertion shifted the normalized string. Switch to tracking
original markdown offsets (end_of_anchor) which remain stable across
insertions. Re-normalize per iteration instead of after insertion.

Also restores ${CLAUDE_SKILL_DIR} in SKILL.md and docstring paths.
---
 .../skills/docs-convert-gdoc-md/scripts/gdoc2md.py    | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/plugins/docs-tools/skills/docs-convert-gdoc-md/scripts/gdoc2md.py b/plugins/docs-tools/skills/docs-convert-gdoc-md/scripts/gdoc2md.py
index f3ea2d03..44c12df1 100644
--- a/plugins/docs-tools/skills/docs-convert-gdoc-md/scripts/gdoc2md.py
+++ b/plugins/docs-tools/skills/docs-convert-gdoc-md/scripts/gdoc2md.py
@@ -349,9 +349,7 @@ def insert_comment_footnotes(
 
     footnotes: list[str] = []
     fn_index = 1
-    used_positions: set[int] = set()
-
-    norm_md = _normalize(markdown)
+    used_offsets: set[int] = set()
 
     for comment in comments:
         anchor = comment["quoted_text"]
@@ -366,18 +364,19 @@ def insert_comment_footnotes(
 
         norm_anchor = _normalize(anchor) if anchor else ""
         if norm_anchor:
+            norm_md = _normalize(markdown)
             search_from = 0
             pos = -1
             while True:
                 candidate = norm_md.find(norm_anchor, search_from)
                 if candidate == -1:
                     break
-                if candidate not in used_positions:
+                orig_end = _find_original_end(markdown, norm_md, candidate, len(norm_anchor))
+                if orig_end not in used_offsets:
                     pos = candidate
                     break
                 search_from = candidate + 1
             if pos != -1:
-                used_positions.add(pos)
                 end_of_anchor = _find_original_end(
                     markdown,
                     norm_md,
@@ -388,8 +387,8 @@ def insert_comment_footnotes(
                     markdown,
                     end_of_anchor,
                 )
+                used_offsets.add(end_of_anchor)
                 markdown = markdown[:end_of_anchor] + label + markdown[end_of_anchor:]
-                norm_md = _normalize(markdown)
                 footnotes.append(footnote_def)
                 fn_index += 1
                 continue

From eb4837a9e195f119e1662f4f6fb570291a05c36c Mon Sep 17 00:00:00 2001
From: Brian Burt <bburt@redhat.com>
Date: Mon, 27 Apr 2026 11:40:05 -0400
Subject: [PATCH 3/4] fix(docs-convert-gdoc): resolve anchors before mutating
 markdown Refactor insert_comment_footnotes to a two-pass approach: first
 resolve all anchor positions against the unmodified markdown, then apply
 insertions from end to start so earlier labels cannot invalidate later anchor
 matches.

---
 .../docs-convert-gdoc-md/scripts/gdoc2md.py   | 30 +++++++++++--------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/plugins/docs-tools/skills/docs-convert-gdoc-md/scripts/gdoc2md.py b/plugins/docs-tools/skills/docs-convert-gdoc-md/scripts/gdoc2md.py
index 44c12df1..50f469d9 100644
--- a/plugins/docs-tools/skills/docs-convert-gdoc-md/scripts/gdoc2md.py
+++ b/plugins/docs-tools/skills/docs-convert-gdoc-md/scripts/gdoc2md.py
@@ -349,7 +349,12 @@ def insert_comment_footnotes(
 
     footnotes: list[str] = []
     fn_index = 1
+
+    # Pass 1: resolve all anchor positions against the *unmodified* markdown
+    # so earlier matches cannot invalidate later ones.
+    norm_md = _normalize(markdown)
     used_offsets: set[int] = set()
+    insertions: list[tuple[int, str, str]] = []
 
     for comment in comments:
         anchor = comment["quoted_text"]
@@ -364,7 +369,6 @@ def insert_comment_footnotes(
 
         norm_anchor = _normalize(anchor) if anchor else ""
         if norm_anchor:
-            norm_md = _normalize(markdown)
             search_from = 0
             pos = -1
             while True:
@@ -377,25 +381,25 @@ def insert_comment_footnotes(
                     break
                 search_from = candidate + 1
             if pos != -1:
-                end_of_anchor = _find_original_end(
-                    markdown,
-                    norm_md,
-                    pos,
-                    len(norm_anchor),
-                )
-                end_of_anchor = _snap_to_word_boundary(
-                    markdown,
-                    end_of_anchor,
-                )
+                end_of_anchor = _find_original_end(markdown, norm_md, pos, len(norm_anchor))
+                end_of_anchor = _snap_to_word_boundary(markdown, end_of_anchor)
                 used_offsets.add(end_of_anchor)
-                markdown = markdown[:end_of_anchor] + label + markdown[end_of_anchor:]
-                footnotes.append(footnote_def)
+                insertions.append((end_of_anchor, label, footnote_def))
                 fn_index += 1
                 continue
 
         footnotes.append(footnote_def)
         fn_index += 1
 
+    # Pass 2: apply insertions from end to start so offsets stay valid.
+    insertions.sort(key=lambda t: t[0], reverse=True)
+    for offset, label, footnote_def in insertions:
+        markdown = markdown[:offset] + label + markdown[offset:]
+        footnotes.append(footnote_def)
+
+    # Re-sort footnotes by their numeric index for consistent output.
+    footnotes.sort(key=lambda f: int(f.split("]")[0].lstrip("[^")))
+
     if footnotes:
         markdown = markdown.rstrip() + "\n\n---\n\n"
         markdown += "\n".join(footnotes) + "\n"

From e00a833bee1a3473bea137d9ffc1d7bd5f0c38e3 Mon Sep 17 00:00:00 2001
From: Brian Burt <bburt@redhat.com>
Date: Mon, 27 Apr 2026 11:42:24 -0400
Subject: [PATCH 4/4] fix(docs-convert-gdoc): two-pass anchor resolution,
 update deps.json

Refactor insert_comment_footnotes to resolve all anchor positions
against the unmodified markdown first, then apply insertions in reverse
order so earlier labels cannot invalidate later matches.

Regenerate deps.json after rebase picked up create_mr.sh -> create_mr.py
migration from upstream.
---
 scripts/deps.json | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/scripts/deps.json b/scripts/deps.json
index 4ee03a50..64a5d962 100644
--- a/scripts/deps.json
+++ b/scripts/deps.json
@@ -36,6 +36,7 @@
       "package": "PyGithub",
       "import_name": "github",
       "found_in": [
+        "plugins/docs-tools/skills/docs-workflow-create-mr/scripts/create_mr.py",
         "plugins/docs-tools/skills/git-pr-reader/scripts/git_pr_reader.py"
       ]
     },
@@ -43,6 +44,7 @@
       "package": "python-gitlab",
       "import_name": "gitlab",
       "found_in": [
+        "plugins/docs-tools/skills/docs-workflow-create-mr/scripts/create_mr.py",
         "plugins/docs-tools/skills/git-pr-reader/scripts/git_pr_reader.py"
       ]
     },
@@ -115,15 +117,11 @@
     },
     {
       "tool": "gh",
-      "found_in": [
-        "plugins/docs-tools/skills/docs-workflow-create-mr/scripts/create_mr.sh"
-      ]
+      "found_in": []
     },
     {
       "tool": "glab",
-      "found_in": [
-        "plugins/docs-tools/skills/docs-workflow-create-mr/scripts/create_mr.sh"
-      ]
+      "found_in": []
     },
     {
       "tool": "jq",
@@ -131,7 +129,6 @@
         "plugins/docs-tools/skills/docs-orchestrator/hooks/workflow-completion-check.sh",
         "plugins/docs-tools/skills/docs-orchestrator/scripts/setup-hooks.sh",
         "plugins/docs-tools/skills/docs-workflow-create-jira/scripts/create-jira-ticket.sh",
-        "plugins/docs-tools/skills/docs-workflow-create-mr/scripts/create_mr.sh",
         "plugins/docs-tools/skills/docs-workflow-jira-ready/scripts/jira-ready-check.sh",
         "plugins/docs-tools/skills/docs-workflow-writing/scripts/build_writing_args.sh"
       ]