From 97ba9a4ddd57915b96bdab3ec5709084cfca1db6 Mon Sep 17 00:00:00 2001 From: NiveditJain Date: Wed, 29 Apr 2026 22:00:04 -0700 Subject: [PATCH 1/2] [docs] sanitize JSX attribute quotes in auto-translated MDX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The German translator periodically emits `` where it intends `„…"` typographic quotes but uses ASCII `"` for the closing — the inner straight `"` terminates the JSX attribute and the real attribute close becomes a stray `"` before `>`, which trips `mintlify validate` with `Unexpected character "`. PR #229 fixed this once by hand on `docs/de/dashboard.mdx`. The next auto-translation run regenerated the same broken markup, so the same parse error landed on `main` again after #246. Make it stick: - `scripts/translate-docs/mdx-translator.ts` adds `sanitizeJsxAttributes`, which strips stray trailing ASCII `"` after a JSX attribute close and drops unmatched typographic opening quotes (`„`, `"`, `«`, `‹`, `「`, `『`) inside the same value. Matched pairs (e.g. `「ポリシー」`) are preserved. Wired into `translateMdxPage` ahead of `rewriteInternalLinks`. - `scripts/translate-docs/translator.ts` extends rule #2 of the system prompt to forbid ASCII `"` inside JSX attribute values entirely, so the LLM is less likely to produce the pattern in the first place. - `__tests__/scripts/translate-docs/mdx-translator.test.ts` covers the exact `de/dashboard.mdx` failure plus self-close, multi-attribute, matched typographic pairs, empty-value, and multiple-on-one-line cases. - `docs/de/dashboard.mdx` drops the inner German quotes from the two `` attributes (mirrors #229) so CI on `main` goes green immediately rather than waiting for the next translation cycle. Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 1 + .../translate-docs/mdx-translator.test.ts | 55 ++++++++++++++++++- docs/de/dashboard.mdx | 4 +- scripts/translate-docs/mdx-translator.ts | 45 ++++++++++++++- scripts/translate-docs/translator.ts | 2 +- 5 files changed, 101 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 20c3db03..0aec3c46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ ### Fixes - `failproofai policies --uninstall` interactive CLI selector now says "Remove Hooks" / "Choose where to remove from:" instead of "Install Hooks" / "Choose where to install:" (#236) - README: replace the GitHub Copilot logo with the current canonical mark and add a dark-mode variant (`copilot-light.svg` + `copilot-dark.svg` via ``); the previous SVG used outdated path data with a hard-coded black fill that rendered invisibly on GitHub's dark theme (#236) +- Auto-translated MDX: stop the recurring `mintlify validate` parse error in `docs/de/dashboard.mdx` (``) by adding a `sanitizeJsxAttributes` post-processor to the translation pipeline that strips stray ASCII `"` left after typographic-quote pairs (and any unmatched opening typographic quote) in JSX attribute values, and by tightening the translator system prompt to forbid ASCII `"` inside attribute values. Same regression PR #229 fixed by hand — now it can't recur. Includes the immediate file fix on `docs/de/dashboard.mdx`. (#247) ## 0.0.9 — 2026-04-28 diff --git a/__tests__/scripts/translate-docs/mdx-translator.test.ts b/__tests__/scripts/translate-docs/mdx-translator.test.ts index 9f78d842..6841ad4b 100644 --- a/__tests__/scripts/translate-docs/mdx-translator.test.ts +++ b/__tests__/scripts/translate-docs/mdx-translator.test.ts @@ -1,6 +1,9 @@ // @vitest-environment node import { describe, it, expect } from "vitest"; -import { rewriteInternalLinks } from "@/scripts/translate-docs/mdx-translator"; +import { + rewriteInternalLinks, + sanitizeJsxAttributes, +} from "@/scripts/translate-docs/mdx-translator"; describe("rewriteInternalLinks", () => { it("rewrites MDX component href attributes with language prefix", () => { @@ -69,3 +72,53 @@ See [config](/configuration) and [testing](/testing). expect(result).toBe(`[link](/es/getting-started#install)`); }); }); + +describe("sanitizeJsxAttributes", () => { + it("strips stray trailing ASCII quotes after a JSX attribute close", () => { + // The exact failure mode that broke `mintlify validate` on de/dashboard.mdx + const input = ` `; + const result = sanitizeJsxAttributes(input); + expect(result).toBe(` `); + }); + + it("strips trailing extras when attribute is followed by a self-close", () => { + const input = ``; + const result = sanitizeJsxAttributes(input); + expect(result).toBe(``); + }); + + it("strips trailing extras when attribute is followed by another attribute", () => { + const input = ``; + const result = sanitizeJsxAttributes(input); + expect(result).toBe(``); + }); + + it("leaves well-formed attributes untouched", () => { + const input = `\n`; + expect(sanitizeJsxAttributes(input)).toBe(input); + }); + + it("preserves matched typographic quote pairs", () => { + // Japanese 「…」 has matched open/close so should NOT be stripped even if + // there were stray ASCII trailing quotes — though here there are none. + const input = ``; + expect(sanitizeJsxAttributes(input)).toBe(input); + }); + + it("strips unmatched typographic opening quotes when extras are present", () => { + // German „ without a matching " (U+201D) — drop the dangling open + const input = ``; + expect(sanitizeJsxAttributes(input)).toBe(``); + }); + + it("does not mangle empty attributes", () => { + const input = ``; + expect(sanitizeJsxAttributes(input)).toBe(input); + }); + + it("handles multiple malformed attributes on the same line", () => { + const input = ``; + const result = sanitizeJsxAttributes(input); + expect(result).toBe(``); + }); +}); diff --git a/docs/de/dashboard.mdx b/docs/de/dashboard.mdx index 0d0a7629..acf8e188 100644 --- a/docs/de/dashboard.mdx +++ b/docs/de/dashboard.mdx @@ -62,13 +62,13 @@ Sie können die Sitzung als ZIP- oder JSONL-Datei über die Download-Schaltfläc Eine Seite mit zwei Tabs zur Verwaltung von Richtlinien und Einsicht der Aktivitäten. - + - Einzelne Richtlinien mit einem Klick aktivieren oder deaktivieren (schreibt in `~/.failproofai/policies-config.json`) - Eine Richtlinie aufklappen, um ihre Parameter zu konfigurieren (für Richtlinien, die `policyParams` unterstützen) - Hooks für einen bestimmten Scope installieren oder entfernen - Einen benutzerdefinierten Pfad für die Richtliniendatei festlegen - + - Vollständige, seitenweise Übersicht aller Hook-Ereignisse, die über alle Sitzungen hinweg ausgelöst wurden - Filtern nach Entscheidung, Ereignistyp, CLI (Claude Code / OpenAI Codex / GitHub Copilot _(Beta)_), Richtlinienname oder Sitzungs-ID - Jede Zeile zeigt: Zeitstempel, Richtlinienname, Entscheidung, CLI-Badge (orange = Claude Code, lila = OpenAI Codex, blau = GitHub Copilot), Tool-Name, Sitzungs-ID und den Grund für deny/instruct-Entscheidungen diff --git a/scripts/translate-docs/mdx-translator.ts b/scripts/translate-docs/mdx-translator.ts index 8435a798..be5d26bb 100644 --- a/scripts/translate-docs/mdx-translator.ts +++ b/scripts/translate-docs/mdx-translator.ts @@ -14,6 +14,46 @@ import type { TranslationResult, TranslationCache } from "./types"; const __dirname = dirname(fileURLToPath(import.meta.url)); const DOCS_DIR = join(__dirname, "..", "..", "docs"); +/** + * Strip stray ASCII `"` that appear right after a JSX attribute's closing + * quote — e.g. ``. The translator sometimes + * wraps an inner phrase in language-specific typographic quotes (`„…"`, + * `「…」`, etc.) but uses an ASCII `"` for the closing instead of the + * proper U+201D, which terminates the attribute and leaves the real + * closing `"` as a stray character that breaks `mintlify validate`. + * + * Also drops unmatched typographic opening quotes inside the same attribute + * value so the rendered title doesn't end with a dangling `„` after we strip + * the extras. + */ +export function sanitizeJsxAttributes(content: string): string { + const openings: Array<[string, string]> = [ + ["„", "“"], // German „ … " + ["“", "”"], // English curly " … " + ["«", "»"], // French « … » + ["‹", "›"], // French single ‹ … › + ["「", "」"], // Japanese 「 … 」 + ["『", "』"], // Japanese 『 … 』 + ]; + return content.replace( + /([a-zA-Z_-]+=")([^"\n]*)"+(?=\s|\/|>)/g, + (match, prefix: string, value: string) => { + // If the original had exactly one closing " (i.e. no extras), + // leave it alone — the regex's `"+` would still match a single + // quote, so we need to re-check the match length to be safe. + const expectedMinLen = `${prefix}${value}"`.length; + if (match.length === expectedMinLen) return match; + let cleaned = value; + for (const [open, close] of openings) { + const opens = cleaned.split(open).length - 1; + const closes = cleaned.split(close).length - 1; + if (opens > closes) cleaned = cleaned.split(open).join(""); + } + return `${prefix}${cleaned}"`; + }, + ); +} + /** * Rewrite internal doc links to include the language prefix. * e.g. href="/built-in-policies" -> href="/es/built-in-policies" @@ -94,8 +134,9 @@ export async function translateMdxPage( options.model, ); - // Rewrite internal links - const withLinks = rewriteInternalLinks(translated, lang); + // Strip stray quote artifacts from JSX attribute values, then rewrite links + const sanitized = sanitizeJsxAttributes(translated); + const withLinks = rewriteInternalLinks(sanitized, lang); // Write output mkdirSync(dirname(outputPath), { recursive: true }); diff --git a/scripts/translate-docs/translator.ts b/scripts/translate-docs/translator.ts index c7e1145f..57f4726a 100644 --- a/scripts/translate-docs/translator.ts +++ b/scripts/translate-docs/translator.ts @@ -15,7 +15,7 @@ const SYSTEM_PROMPT = `You are a professional technical documentation translator ## Rules 1. **Preserve all code blocks exactly as-is** — never translate content inside backtick-fenced code blocks (\`\`\`...\`\`\`) or inline code (\`...\`). -2. **Preserve MDX component syntax** — tags like , , , , , , , , , must remain unchanged. Their attribute names (title, icon, href, cols) must remain in English. Only translate the text content of the \`title\` attribute and the text body between tags. +2. **Preserve MDX component syntax** — tags like , , , , , , , , , must remain unchanged. Their attribute names (title, icon, href, cols) must remain in English. Only translate the text content of the \`title\` attribute and the text body between tags. **Never put an ASCII straight \`"\` inside a \`title="…"\` (or any JSX attribute value)** — it terminates the attribute and breaks MDX parsing. If the target language would normally wrap a word in quotation marks (e.g. German „…", Japanese 「…」), drop the inner quotes inside attribute values and rely on the surrounding tag for emphasis. 3. **Preserve YAML frontmatter keys** — only translate the string values of \`title\` and \`description\`. Keep the \`icon\` value unchanged. 4. **Preserve all URLs and paths** — never modify href values, image paths, or links. 5. **Preserve Markdown structure** — headers (#, ##), lists (-, *), tables (|), bold (**), italic (*), links ([text](url)) must keep their Markdown formatting. From 897ee503575ce649623c577c84acae4b6db79d23 Mon Sep 17 00:00:00 2001 From: NiveditJain Date: Wed, 29 Apr 2026 22:09:00 -0700 Subject: [PATCH 2/2] [docs] address CodeRabbit review on sanitizeJsxAttributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two findings on PR #247: 1. mdx-translator.ts:50 — `cleaned.split(open).join("")` removed *every* occurrence of an opener when `opens > closes`, so a value containing one matched typographic pair plus one stray opener (e.g. `„Foo“ und „Bar`) lost the matched pair too. Fix: drop only the surplus = opens - closes openers, scanning from the right with `lastIndexOf` so the leftmost matched pair is preserved. 2. mdx-translator.test.ts — add a regression test for that mixed case (one matched „…“ pair + one dangling „) so the bug above can't recur. Also drop the English curly “…” pair from the openings list. U+201C is both the German closer and the English-curly opener, so processing the English pair after the German pair would strip the very German closer we just preserved. The remaining pairs (German, French ×2, Japanese ×2) all have unambiguous openers. 1177 unit tests pass (was 1176 — the new mixed-case test is the +1). Co-Authored-By: Claude Opus 4.7 --- .../translate-docs/mdx-translator.test.ts | 7 +++++++ scripts/translate-docs/mdx-translator.ts | 17 +++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/__tests__/scripts/translate-docs/mdx-translator.test.ts b/__tests__/scripts/translate-docs/mdx-translator.test.ts index 6841ad4b..dc805bd6 100644 --- a/__tests__/scripts/translate-docs/mdx-translator.test.ts +++ b/__tests__/scripts/translate-docs/mdx-translator.test.ts @@ -111,6 +111,13 @@ describe("sanitizeJsxAttributes", () => { expect(sanitizeJsxAttributes(input)).toBe(``); }); + it("drops only the surplus opener when a matched pair is also present", () => { + // One properly matched „…“ German pair plus one dangling „ — keep the + // pair, strip only the unmatched trailing opener. + const input = ``; + expect(sanitizeJsxAttributes(input)).toBe(``); + }); + it("does not mangle empty attributes", () => { const input = ``; expect(sanitizeJsxAttributes(input)).toBe(input); diff --git a/scripts/translate-docs/mdx-translator.ts b/scripts/translate-docs/mdx-translator.ts index be5d26bb..65b010f7 100644 --- a/scripts/translate-docs/mdx-translator.ts +++ b/scripts/translate-docs/mdx-translator.ts @@ -27,9 +27,13 @@ const DOCS_DIR = join(__dirname, "..", "..", "docs"); * the extras. */ export function sanitizeJsxAttributes(content: string): string { + // Each pair must use an OPENER that is unambiguously an opener — i.e. the + // codepoint never serves as a CLOSER of a different pair. That's why we + // skip English curly “…” (U+201C/U+201D): U+201C is also the German + // closer, so processing English curly after German would strip the very + // German closer we just preserved. const openings: Array<[string, string]> = [ ["„", "“"], // German „ … " - ["“", "”"], // English curly " … " ["«", "»"], // French « … » ["‹", "›"], // French single ‹ … › ["「", "」"], // Japanese 「 … 」 @@ -47,7 +51,16 @@ export function sanitizeJsxAttributes(content: string): string { for (const [open, close] of openings) { const opens = cleaned.split(open).length - 1; const closes = cleaned.split(close).length - 1; - if (opens > closes) cleaned = cleaned.split(open).join(""); + // Drop only the surplus unmatched openers, removing from the right. + // A value like `„Foo“ und „Bar` (one matched pair plus one stray + // opener) keeps the leading `„Foo“` intact and only the dangling + // `„Bar` opener gets stripped. + let surplus = opens - closes; + while (surplus-- > 0) { + const i = cleaned.lastIndexOf(open); + if (i < 0) break; + cleaned = cleaned.slice(0, i) + cleaned.slice(i + open.length); + } } return `${prefix}${cleaned}"`; },