From 281bc75dd770c2c16f8bca8f2e1944fc144739dc Mon Sep 17 00:00:00 2001 From: miller-da Date: Mon, 22 Jun 2026 19:36:41 -0500 Subject: [PATCH 1/2] feat: dpm mdx-validate component Signed-off-by: miller-da --- daml.yaml | 8 +- tools/mdx-validate/.gitignore | 7 + tools/mdx-validate/LICENSE | 201 ++++++++ tools/mdx-validate/Makefile | 49 ++ tools/mdx-validate/README.md | 151 ++++++ tools/mdx-validate/cmd/mdx-validate/main.go | 163 +++++++ .../cmd/mdx-validate/main_test.go | 142 ++++++ tools/mdx-validate/component.windows.yaml | 13 + tools/mdx-validate/component.yaml | 12 + tools/mdx-validate/daml.yaml | 6 + tools/mdx-validate/go.mod | 3 + .../mdx-validate/internal/catalog/catalog.go | 288 ++++++++++++ .../internal/catalog/catalog_test.go | 95 ++++ .../mdx-validate/internal/mdxscan/mdxscan.go | 437 ++++++++++++++++++ .../internal/mdxscan/mdxscan_test.go | 157 +++++++ .../internal/validate/components.go | 116 +++++ .../internal/validate/components_test.go | 86 ++++ .../internal/validate/frontmatter.go | 148 ++++++ .../internal/validate/frontmatter_test.go | 127 +++++ .../mdx-validate/internal/validate/images.go | 186 ++++++++ .../internal/validate/images_test.go | 81 ++++ .../mdx-validate/internal/validate/parsed.go | 24 + .../mdx-validate/internal/validate/runner.go | 208 +++++++++ .../internal/validate/runner_test.go | 161 +++++++ .../mdx-validate/internal/validate/shadow.go | 64 +++ .../internal/validate/shadow_test.go | 115 +++++ .../internal/validate/structure.go | 104 +++++ .../internal/validate/structure_test.go | 81 ++++ tools/mdx-validate/internal/validate/types.go | 68 +++ tools/mdx-validate/smoke-test.sh | 72 +++ 30 files changed, 3370 insertions(+), 3 deletions(-) create mode 100644 tools/mdx-validate/.gitignore create mode 100644 tools/mdx-validate/LICENSE create mode 100644 tools/mdx-validate/Makefile create mode 100644 tools/mdx-validate/README.md create mode 100644 tools/mdx-validate/cmd/mdx-validate/main.go create mode 100644 tools/mdx-validate/cmd/mdx-validate/main_test.go create mode 100644 tools/mdx-validate/component.windows.yaml create mode 100644 tools/mdx-validate/component.yaml create mode 100644 tools/mdx-validate/daml.yaml create mode 100644 tools/mdx-validate/go.mod create mode 100644 tools/mdx-validate/internal/catalog/catalog.go create mode 100644 tools/mdx-validate/internal/catalog/catalog_test.go create mode 100644 tools/mdx-validate/internal/mdxscan/mdxscan.go create mode 100644 tools/mdx-validate/internal/mdxscan/mdxscan_test.go create mode 100644 tools/mdx-validate/internal/validate/components.go create mode 100644 tools/mdx-validate/internal/validate/components_test.go create mode 100644 tools/mdx-validate/internal/validate/frontmatter.go create mode 100644 tools/mdx-validate/internal/validate/frontmatter_test.go create mode 100644 tools/mdx-validate/internal/validate/images.go create mode 100644 tools/mdx-validate/internal/validate/images_test.go create mode 100644 tools/mdx-validate/internal/validate/parsed.go create mode 100644 tools/mdx-validate/internal/validate/runner.go create mode 100644 tools/mdx-validate/internal/validate/runner_test.go create mode 100644 tools/mdx-validate/internal/validate/shadow.go create mode 100644 tools/mdx-validate/internal/validate/shadow_test.go create mode 100644 tools/mdx-validate/internal/validate/structure.go create mode 100644 tools/mdx-validate/internal/validate/structure_test.go create mode 100644 tools/mdx-validate/internal/validate/types.go create mode 100755 tools/mdx-validate/smoke-test.sh diff --git a/daml.yaml b/daml.yaml index 7f7114ee9..1cd7d7f59 100644 --- a/daml.yaml +++ b/daml.yaml @@ -1,3 +1,5 @@ -override-components: - rst-to-mdx: - local-path: ./tools/rst-to-mdx \ No newline at end of file +components: + - name: rst-to-mdx + path: ./tools/rst-to-mdx + - name: mdx-validate + path: ./tools/mdx-validate diff --git a/tools/mdx-validate/.gitignore b/tools/mdx-validate/.gitignore new file mode 100644 index 000000000..81bf2d17a --- /dev/null +++ b/tools/mdx-validate/.gitignore @@ -0,0 +1,7 @@ +# Compiled binary produced by `make build`. +# Anchored with a leading slash so it only ignores the top-level binary, +# not the cmd/mdx-validate/ source directory of the same name. +/mdx-validate + +# Cross-compile output produced by `make release`. +/dist/ diff --git a/tools/mdx-validate/LICENSE b/tools/mdx-validate/LICENSE new file mode 100644 index 000000000..a3602cbc5 --- /dev/null +++ b/tools/mdx-validate/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright (c) 2022 Digital Asset (Switzerland) GmbH and/or its affiliates + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/tools/mdx-validate/Makefile b/tools/mdx-validate/Makefile new file mode 100644 index 000000000..788e2ce12 --- /dev/null +++ b/tools/mdx-validate/Makefile @@ -0,0 +1,49 @@ +BINARY := mdx-validate +DIST := dist +GO ?= go +OS_ARCHES := darwin/arm64 darwin/amd64 linux/arm64 linux/amd64 windows/amd64 + +.PHONY: all build test fmt vet tidy smoke release clean + +all: build + +build: + $(GO) build -o $(BINARY) ./cmd/mdx-validate + +test: + $(GO) test ./... + +fmt: + $(GO) fmt ./... + +vet: + $(GO) vet ./... + +tidy: + $(GO) mod tidy + +smoke: build + ./smoke-test.sh + +# Cross-compile to dist/-/ for publishing via +# `dpm publish component` (see README). Mirrors rst-to-mdx's release shape. +release: + @rm -rf $(DIST) + @for pair in $(OS_ARCHES); do \ + os=$${pair%/*}; arch=$${pair#*/}; \ + out=$(DIST)/$$os-$$arch; \ + mkdir -p $$out; \ + ext=""; if [ "$$os" = "windows" ]; then ext=".exe"; fi; \ + echo ">> $$os/$$arch"; \ + GOOS=$$os GOARCH=$$arch $(GO) build -o $$out/$(BINARY)$$ext ./cmd/mdx-validate; \ + if [ "$$os" = "windows" ]; then \ + cp component.windows.yaml $$out/component.yaml; \ + else \ + cp component.yaml $$out/; \ + fi; \ + cp LICENSE $$out/; \ + done + +clean: + rm -f $(BINARY) + rm -rf $(DIST) diff --git a/tools/mdx-validate/README.md b/tools/mdx-validate/README.md new file mode 100644 index 000000000..021d86284 --- /dev/null +++ b/tools/mdx-validate/README.md @@ -0,0 +1,151 @@ +# mdx-validate + +Validates Mintlify MDX documentation files. The dpm component complement +to `rst-to-mdx`: where the converter emits MDX, `mdx-validate` checks the +MDX in tree before it ships. + +- **Frontmatter — `title:` required.** Errors on MDX files that have no + YAML frontmatter, that have frontmatter without a `title:` field, or + whose `title:` value is empty. +- **Images** - Checks that image references have a matching image. +- **Snippets are skipped.** Files under any `snippets/` directory are + excluded (Mintlify reusable snippets don't have frontmatter by design). +- **`--staged` mode.** Validates only `.mdx` files in git's staged + index — suitable for a pre-commit hook. + +Deferred: +- Internal link / anchor checking via `mintlify broken-links` wrapper +- Heading hierarchy warnings +- External link `--check-external` HTTP probes +- Pre-commit/lefthook config snippets + +## Usage + +```sh +# Validate everything under ./docs-main (the default) +dpm mdx-validate + +# Validate specific files or directories +dpm mdx-validate docs-main/appdev/quickstart.mdx +dpm mdx-validate docs-main/appdev + +# Pre-commit mode: only files in the staged git index +dpm mdx-validate --staged + +# Make warnings blocking +dpm mdx-validate --strict ./docs-main +``` + +## Build + +This component lives in the `tools/` Go workspace. It targets **Go 1.21** +to match the toolchain pinned across the `tools/` components and the +standard library only. + +Run make commands from `tools/mdx-validate/`: + +```sh +make build # builds ./mdx-validate +make test # runs unit + runner tests +make smoke # builds, then runs smoke-test.sh +make release # cross-compile to dist/-/ for publishing +make clean +``` + +`make release` produces one directory per platform under `dist/`, each +containing the binary, a `component.yaml`, and the `LICENSE`. Windows gets +`component.windows.yaml` (renamed to `component.yaml`) because the binary +there is `mdx-validate.exe` — `dpm publish` validates that the manifest's +`path:` resolves to a real file on every platform. + +## Publishing & installing as an OCI component + +dpm components are distributed as OCI artifacts (Open Container Initiative — +the same artifact format used by container registries), one manifest per +`/`. The commands below require **dpm 3.5.1+** (`dpm publish` and +`dpm tags` do not exist in 3.4.x). + +### 1. Publish to an OCI registry + +```sh +make release # cross-compile into dist/-/ + +dpm publish component oci:///mdx-validate: \ + -p darwin/arm64=dist/darwin-arm64 \ + -p darwin/amd64=dist/darwin-amd64 \ + -p linux/arm64=dist/linux-arm64 \ + -p linux/amd64=dist/linux-amd64 \ + -p windows/amd64=dist/windows-amd64 +``` + +- The docs tooling registry is `europe-docker.pkg.dev/da-images/public`. +- `--dry-run` validates every per-platform manifest and the required + `LICENSE` **without pushing** — run this first. +- Auth defaults to Docker's `~/.docker/config.json`; override with + `--auth `. `--extra-tags`/`-t` adds tags beyond the semver; + `--include-git-info`/`-g` stamps git provenance annotations. +- Promotion from the `*-unstable` registry to the public one is a gated + step owned by the dpm/release team (`dpm repo promote-components …`), not + part of normal component development. + +### 2. Declare it in a project + +Add the component to the project's `daml.yaml` under `components:`. An entry +is one of three forms: + +```yaml +components: + # published component pulled from the configured registry + - mdx-validate:0.1.0 + # …or a full OCI reference + - oci://europe-docker.pkg.dev/da-images/public/mdx-validate:0.1.0 + # …or a local checkout, for development + - name: mdx-validate + path: ./tools/mdx-validate +``` + +> The older `override-components:` key still works but is **deprecated** in +> dpm 3.5.1 — prefer `components:`. + +### 3. Use it (and confirm it installed) + +```sh +dpm --help # mdx-validate appears under Dpm-SDK Commands +dpm mdx-validate --version # confirms the resolved binary runs +dpm mdx-validate docs-main # real run +``` + +dpm pulls and caches the resolved platform under +`~/.dpm/cache/components/mdx-validate//`. To run a published +component once without declaring it in a project: + +```sh +dpm component run mdx-validate [args] +dpm tags oci:///mdx-validate # list published versions +``` + +### 4. Remove it from a project + +Delete the component's entry from `daml.yaml` `components:`; it stops +appearing in `dpm`. To also drop the cached download: + +```sh +rm -rf ~/.dpm/cache/components/mdx-validate +``` + +## Exit codes + +| Code | Meaning | +|------|---------| +| 0 | Clean run (no errors; warnings allowed unless `--strict`) | +| 1 | Blocking findings reported | +| 2 | Usage error or I/O failure | + +## Adding a validator + +1. Implement `validate.Validator` in `internal/validate/.go`. +2. Register the new validator in `validate.DefaultValidators()`. +3. Add unit tests for the validator itself and (if path discovery + changes) update `runner_test.go`. +4. Run `make smoke` and the tree validation against `./docs-main` before + landing — the false-positive guard is part of the contract. diff --git a/tools/mdx-validate/cmd/mdx-validate/main.go b/tools/mdx-validate/cmd/mdx-validate/main.go new file mode 100644 index 000000000..cca147a37 --- /dev/null +++ b/tools/mdx-validate/cmd/mdx-validate/main.go @@ -0,0 +1,163 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +// Command mdx-validate validates Mintlify MDX documentation files. It is +// the dpm component complement to rst-to-mdx: where the converter emits +// MDX, mdx-validate checks that the MDX in tree is valid before it ships. +package main + +import ( + "flag" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "strings" + + "daml.com/x/dpm-components/mdx-validate/internal/validate" +) + +const version = "0.5.0-dev" + +func main() { + os.Exit(run(os.Stdout, os.Stderr, os.Args[1:])) +} + +// run is the testable entry point. It returns the process exit code. +// +// Exit codes: +// +// 0 — clean run (no errors, or only warnings without --strict) +// 1 — blocking findings reported +// 2 — usage error or I/O failure +func run(stdout, stderr io.Writer, args []string) int { + fs := flag.NewFlagSet("mdx-validate", flag.ContinueOnError) + fs.SetOutput(stderr) + fs.Usage = func() { usage(stderr, fs) } + + strict := fs.Bool("strict", false, "promote warnings to errors") + staged := fs.Bool("staged", false, "validate only files staged in the git index (pre-commit mode)") + showVersion := fs.Bool("version", false, "print version and exit") + + if err := fs.Parse(args); err != nil { + if err == flag.ErrHelp { + return 0 + } + return 2 + } + + if *showVersion { + fmt.Fprintln(stdout, "mdx-validate", version) + return 0 + } + + paths := fs.Args() + if *staged && len(paths) > 0 { + fmt.Fprintln(stderr, "mdx-validate: --staged cannot be combined with explicit paths") + return 2 + } + + targets, err := resolveTargets(*staged, paths) + if err != nil { + fmt.Fprintf(stderr, "mdx-validate: %v\n", err) + return 2 + } + + r := validate.Runner{Validators: validate.DefaultValidators()} + findings, counts, err := r.RunPaths(targets) + if err != nil { + fmt.Fprintf(stderr, "mdx-validate: %v\n", err) + return 2 + } + + // Message when nothing matched — covers both `--staged` with + // no staged .mdx and explicit paths that resolved to non-mdx files. + if counts.Files == 0 { + fmt.Fprintln(stdout, "no .mdx files to validate") + return 0 + } + + validate.FormatFindings(stdout, findings) + fmt.Fprintf(stdout, "\n%d error(s), %d warning(s) across %d file(s)\n", + counts.Errors, counts.Warnings, counts.Files) + + if counts.HasBlockingErrors(*strict) { + return 1 + } + return 0 +} + +// resolveTargets picks the set of paths to validate based on the flag +// choices: --staged → git's staged file list; explicit args → those paths +// as-given; neither → ./docs-main as the default. +// +// When the default ./docs-main is selected and that directory does not +// exist, returns an error so the user gets a clear hint rather than a +// confusing zero-files report from the runner. +func resolveTargets(staged bool, paths []string) ([]string, error) { + switch { + case staged: + return stagedMDXFiles() + case len(paths) > 0: + return paths, nil + default: + const defaultDir = "./docs-main" + if _, err := os.Stat(defaultDir); err != nil { + return nil, fmt.Errorf("%s not found in cwd; run from the repo root or pass an explicit path", defaultDir) + } + return []string{defaultDir}, nil + } +} + +// stagedMDXFiles returns the .mdx files in git's staged index. Run from +// anywhere inside the repository. +// +// --diff-filter=ACMR includes Added, Copied, Modified, Renamed entries; a +// pure delete (D) is not validated because there's no content to check. +// +// git reports staged paths relative to the repository root, but the runner +// opens them relative to the current working directory. To keep --staged +// correct when invoked from a subdirectory (e.g. a hook that does not cd to +// the root), each path is joined to the repo root reported by git. +func stagedMDXFiles() ([]string, error) { + root, err := exec.Command("git", "rev-parse", "--show-toplevel").Output() + if err != nil { + return nil, fmt.Errorf("git repo root: %w", err) + } + rootDir := strings.TrimRight(string(root), "\n") + + cmd := exec.Command("git", "diff", "--cached", "--name-only", "--diff-filter=ACMR") + out, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("git staged file list: %w", err) + } + var mdx []string + for _, line := range strings.Split(strings.TrimRight(string(out), "\n"), "\n") { + if line == "" { + continue + } + if !strings.HasSuffix(line, ".mdx") { + continue + } + mdx = append(mdx, filepath.Join(rootDir, line)) + } + return mdx, nil +} + +func usage(w io.Writer, fs *flag.FlagSet) { + fmt.Fprintln(w, "Usage: dpm mdx-validate [flags] [paths...]") + fmt.Fprintln(w) + fmt.Fprintln(w, "Validates Mintlify MDX documentation files. With no paths, validates ./docs-main.") + fmt.Fprintln(w) + fmt.Fprintln(w, "Flags:") + fmt.Fprintln(w, " --staged validate only files staged in the git index (pre-commit mode)") + fmt.Fprintln(w, " --strict promote warnings to errors") + fmt.Fprintln(w, " --version print version and exit") + fmt.Fprintln(w) + fmt.Fprintln(w, "Examples:") + fmt.Fprintln(w, " dpm mdx-validate # validate ./docs-main") + fmt.Fprintln(w, " dpm mdx-validate docs-main/foo.mdx # validate one file") + fmt.Fprintln(w, " dpm mdx-validate --staged # validate staged .mdx files (pre-commit)") + fmt.Fprintln(w, " dpm mdx-validate --strict ./docs-main # warnings become errors") +} diff --git a/tools/mdx-validate/cmd/mdx-validate/main_test.go b/tools/mdx-validate/cmd/mdx-validate/main_test.go new file mode 100644 index 000000000..90524ee82 --- /dev/null +++ b/tools/mdx-validate/cmd/mdx-validate/main_test.go @@ -0,0 +1,142 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestRun_ValidFileExitsZero(t *testing.T) { + path := writeFile(t, "good.mdx", "---\ntitle: T\n---\n\nbody\n") + var stdout, stderr bytes.Buffer + if code := run(&stdout, &stderr, []string{path}); code != 0 { + t.Errorf("exit=%d stderr=%q", code, stderr.String()) + } +} + +func TestRun_MissingTitleExitsOne(t *testing.T) { + path := writeFile(t, "bad.mdx", "---\ndescription: nope\n---\n\nbody\n") + var stdout, stderr bytes.Buffer + if code := run(&stdout, &stderr, []string{path}); code != 1 { + t.Errorf("exit=%d stdout=%q stderr=%q", code, stdout.String(), stderr.String()) + } + if !strings.Contains(stdout.String(), "frontmatter-missing-title") { + t.Errorf("stdout should mention finding code; got %q", stdout.String()) + } +} + +func TestRun_NoFrontmatterExitsOne(t *testing.T) { + path := writeFile(t, "naked.mdx", "# Page\n\nbody\n") + var stdout, stderr bytes.Buffer + if code := run(&stdout, &stderr, []string{path}); code != 1 { + t.Errorf("exit=%d", code) + } + if !strings.Contains(stdout.String(), "frontmatter-missing") { + t.Errorf("expected frontmatter-missing in stdout, got %q", stdout.String()) + } +} + +func TestRun_NonMDXFileIsDropped(t *testing.T) { + path := writeFile(t, "notes.txt", "not an MDX file\n") + var stdout, stderr bytes.Buffer + code := run(&stdout, &stderr, []string{path}) + if code != 0 { + t.Errorf("exit=%d, want 0 (non-mdx files are silently dropped)", code) + } + if !strings.Contains(stdout.String(), "no .mdx files") { + t.Errorf("expected 'no .mdx files' message, got %q", stdout.String()) + } +} + +func TestRun_StrictPromotesWarnings(t *testing.T) { + // v0.1 has no warning-emitting validators, so this case is exercised + // by Counts.HasBlockingErrors directly in runner_test.go. When a + // warning-emitting validator lands, expand this to invoke run() with + // a fixture that produces only warnings, both with and without + // --strict, and assert exit codes 0 vs 1. + t.Skip("no warning-emitting validators registered yet") +} + +func TestRun_Version(t *testing.T) { + var stdout, stderr bytes.Buffer + if code := run(&stdout, &stderr, []string{"--version"}); code != 0 { + t.Errorf("exit=%d", code) + } + if !strings.Contains(stdout.String(), "mdx-validate") { + t.Errorf("stdout should mention tool name, got %q", stdout.String()) + } +} + +func TestRun_HelpExitsZero(t *testing.T) { + var stdout, stderr bytes.Buffer + if code := run(&stdout, &stderr, []string{"-h"}); code != 0 { + t.Errorf("exit=%d for -h, want 0; stderr=%q", code, stderr.String()) + } +} + +func TestRun_StagedRejectsExplicitPaths(t *testing.T) { + var stdout, stderr bytes.Buffer + code := run(&stdout, &stderr, []string{"--staged", "foo.mdx"}) + if code != 2 { + t.Errorf("exit=%d, want 2 (usage error)", code) + } + if !strings.Contains(stderr.String(), "--staged cannot be combined with explicit paths") { + t.Errorf("expected mutual-exclusion message, got %q", stderr.String()) + } +} + +func TestRun_UnknownFlagIsUsageError(t *testing.T) { + var stdout, stderr bytes.Buffer + if code := run(&stdout, &stderr, []string{"--no-such-flag"}); code != 2 { + t.Errorf("exit=%d, want 2", code) + } +} + +func TestRun_DefaultDirMissingHintsRepoRoot(t *testing.T) { + // Run inside an empty tmp dir so ./docs-main does not exist. + withCwd(t, t.TempDir(), func() { + var stdout, stderr bytes.Buffer + code := run(&stdout, &stderr, nil) + if code != 2 { + t.Errorf("exit=%d, want 2 when default dir is missing", code) + } + if !strings.Contains(stderr.String(), "run from the repo root") { + t.Errorf("stderr should hint repo-root, got %q", stderr.String()) + } + }) +} + +// writeFile creates a file in t.TempDir() with the given name + content +// and returns its path. Cleaned up automatically by t.TempDir. +func writeFile(t *testing.T, name, content string) string { + t.Helper() + path := filepath.Join(t.TempDir(), name) + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + t.Fatal(err) + } + return path +} + +// withCwd switches working directory for the duration of fn and restores +// the original on return. +func withCwd(t *testing.T, dir string, fn func()) { + t.Helper() + orig, err := os.Getwd() + if err != nil { + t.Fatal(err) + } + if err := os.Chdir(dir); err != nil { + t.Fatal(err) + } + defer func() { + if err := os.Chdir(orig); err != nil { + t.Fatal(err) + } + }() + fn() +} diff --git a/tools/mdx-validate/component.windows.yaml b/tools/mdx-validate/component.windows.yaml new file mode 100644 index 000000000..4c28a31ef --- /dev/null +++ b/tools/mdx-validate/component.windows.yaml @@ -0,0 +1,13 @@ +# Windows-specific manifest. The DPM component model expects per-platform +# manifests when the binary suffix differs; on Windows the binary is named +# mdx-validate.exe. +# +# $schema: https://raw.githubusercontent.com/DACH-NY/dpm/refs/heads/json-schema/schema/component.schema.json +apiVersion: digitalasset.com/v1 +kind: Component +spec: + commands: + - path: ./mdx-validate.exe + name: mdx-validate + desc: Validate Mintlify MDX documentation files. + aliases: [] diff --git a/tools/mdx-validate/component.yaml b/tools/mdx-validate/component.yaml new file mode 100644 index 000000000..981ea0025 --- /dev/null +++ b/tools/mdx-validate/component.yaml @@ -0,0 +1,12 @@ +# $schema: https://raw.githubusercontent.com/DACH-NY/dpm/refs/heads/json-schema/schema/component.schema.json +# DPM component manifest for the mdx-validate validator. +# See tools/mdx-validate/README.md for development and publishing notes. + +apiVersion: digitalasset.com/v1 +kind: Component +spec: + commands: + - path: ./mdx-validate + name: mdx-validate + desc: Validate Mintlify MDX documentation files. + aliases: [] diff --git a/tools/mdx-validate/daml.yaml b/tools/mdx-validate/daml.yaml new file mode 100644 index 000000000..c4c003afc --- /dev/null +++ b/tools/mdx-validate/daml.yaml @@ -0,0 +1,6 @@ +# Local-dev override so `dpm --help` discovers the mdx-validate command +# when invoked from this directory. See the "Publishing Components" docs +# in dpm/docs-internal/src/components/ for the full mechanism. +override-components: + mdx-validate: + local-path: . diff --git a/tools/mdx-validate/go.mod b/tools/mdx-validate/go.mod new file mode 100644 index 000000000..af19e6ce5 --- /dev/null +++ b/tools/mdx-validate/go.mod @@ -0,0 +1,3 @@ +module daml.com/x/dpm-components/mdx-validate + +go 1.21 diff --git a/tools/mdx-validate/internal/catalog/catalog.go b/tools/mdx-validate/internal/catalog/catalog.go new file mode 100644 index 000000000..e4dfd2194 --- /dev/null +++ b/tools/mdx-validate/internal/catalog/catalog.go @@ -0,0 +1,288 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +// Package components is the hand-curated catalog of Mintlify MDX components +// that this docs site uses, plus the prop specs that downstream tools need to +// emit or validate component usage. +// +// Source of truth: https://www.mintlify.com/docs/components/index +// +// Both rst-to-mdx (when emitting MDX from RST) and mdx-validate (when checking +// existing MDX) consult this catalog. Add entries when you encounter a new +// Mintlify component in docs-main/. +package catalog + +import "sort" + +// PropSpec describes a single component prop. +type PropSpec struct { + Name string + Required bool + EnumValues []string // non-nil for enum-like props with a fixed value set +} + +// Component is the spec for one Mintlify MDX component. +type Component struct { + Name string + Description string + Props []PropSpec + AllowsChildren bool + DocsURL string +} + +// RequiredProps returns the names of props that must be present. +func (c Component) RequiredProps() []string { + var out []string + for _, p := range c.Props { + if p.Required { + out = append(out, p.Name) + } + } + return out +} + +// PropByName returns the spec for a named prop. Second return is false if the +// component does not declare that prop. +func (c Component) PropByName(name string) (PropSpec, bool) { + for _, p := range c.Props { + if p.Name == name { + return p, true + } + } + return PropSpec{}, false +} + +// Lookup returns the component spec for the given JSX tag name. +// The boolean is false for unknown components. +func Lookup(name string) (Component, bool) { + c, ok := catalog[name] + return c, ok +} + +// All returns all known component names in alphabetical order. Useful +// for surfacing a deterministic list in validator error messages. +func All() []string { + out := make([]string, 0, len(catalog)) + for name := range catalog { + out = append(out, name) + } + sort.Strings(out) + return out +} + +// catalog is the in-memory registry. Keep entries alphabetical by component +// name to make diffs readable. +var catalog = map[string]Component{ + "Accordion": { + Name: "Accordion", + Description: "Collapsible disclosure with a title and hidden body content.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/components/accordions", + Props: []PropSpec{ + {Name: "title", Required: true}, + {Name: "description"}, + {Name: "defaultOpen"}, + {Name: "icon"}, + {Name: "iconType"}, + }, + }, + "AccordionGroup": { + Name: "AccordionGroup", + Description: "Container that groups related Accordion components.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/components/accordions", + }, + "Card": { + Name: "Card", + Description: "Linked or static content card with title, optional icon, and child body.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/components/cards", + Props: []PropSpec{ + {Name: "title", Required: true}, + {Name: "icon"}, + {Name: "iconType"}, + {Name: "color"}, + {Name: "href"}, + {Name: "horizontal"}, + {Name: "arrow"}, + {Name: "cta"}, + {Name: "img"}, + }, + }, + "CardGroup": { + Name: "CardGroup", + Description: "Grid container that lays out Card components in N columns.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/components/card-group", + Props: []PropSpec{ + {Name: "cols"}, + }, + }, + "Check": { + Name: "Check", + Description: "Green checkmark callout for confirmations.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/components/callouts", + }, + "CodeGroup": { + Name: "CodeGroup", + Description: "Tabbed grouping of consecutive fenced code blocks.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/components/code-group", + }, + "Columns": { + Name: "Columns", + Description: "Multi-column layout container.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/components/columns", + Props: []PropSpec{ + {Name: "cols"}, + }, + }, + "Expandable": { + Name: "Expandable", + Description: "Collapsible block, typically used inside ResponseField for nested schemas.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/components/expandable", + Props: []PropSpec{ + {Name: "title"}, + {Name: "defaultOpen"}, + }, + }, + "Frame": { + Name: "Frame", + Description: "Bordered container for images or media, with optional caption.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/components/frames", + Props: []PropSpec{ + {Name: "caption"}, + }, + }, + "Icon": { + Name: "Icon", + Description: "Inline icon by name (FontAwesome, Lucide, etc.).", + DocsURL: "https://www.mintlify.com/docs/components/icons", + Props: []PropSpec{ + {Name: "icon", Required: true}, + {Name: "color"}, + {Name: "size"}, + {Name: "iconType", EnumValues: []string{"regular", "solid", "light", "thin", "sharp-solid", "duotone", "brands"}}, + }, + }, + "Info": { + Name: "Info", + Description: "Blue informational callout.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/components/callouts", + }, + "Note": { + Name: "Note", + Description: "Neutral callout for sidebar context.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/components/callouts", + }, + "ParamField": { + Name: "ParamField", + Description: "API request parameter declaration. Used in API reference pages.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/api-playground/params", + Props: []PropSpec{ + {Name: "path"}, + {Name: "query"}, + {Name: "body"}, + {Name: "header"}, + {Name: "type"}, + {Name: "required"}, + {Name: "default"}, + {Name: "placeholder"}, + }, + }, + "RequestExample": { + Name: "RequestExample", + Description: "Container for example API request snippets.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/api-playground/migrating", + }, + "ResponseExample": { + Name: "ResponseExample", + Description: "Container for example API response snippets.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/api-playground/migrating", + }, + "ResponseField": { + Name: "ResponseField", + Description: "API response field declaration.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/api-playground/params", + Props: []PropSpec{ + {Name: "name", Required: true}, + {Name: "type"}, + {Name: "required"}, + {Name: "default"}, + }, + }, + "Step": { + Name: "Step", + Description: "Single step in a Steps sequence.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/components/steps", + Props: []PropSpec{ + {Name: "title", Required: true}, + {Name: "icon"}, + {Name: "iconType"}, + {Name: "stepNumber"}, + }, + }, + "Steps": { + Name: "Steps", + Description: "Numbered procedural sequence wrapping Step components.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/components/steps", + }, + "Tab": { + Name: "Tab", + Description: "Single tab inside a Tabs container.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/components/tabs", + Props: []PropSpec{ + {Name: "title", Required: true}, + }, + }, + "Tabs": { + Name: "Tabs", + Description: "Tabbed container for grouped Tab components.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/components/tabs", + }, + "Tip": { + Name: "Tip", + Description: "Green callout for helpful suggestions.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/components/callouts", + }, + "Tooltip": { + Name: "Tooltip", + Description: "Inline hover tooltip.", + DocsURL: "https://www.mintlify.com/docs/components/tooltips", + Props: []PropSpec{ + {Name: "tip", Required: true}, + }, + }, + "Update": { + Name: "Update", + Description: "Changelog/update entry block.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/components/update", + Props: []PropSpec{ + {Name: "label", Required: true}, + {Name: "description", Required: true}, + {Name: "tags"}, + }, + }, + "Warning": { + Name: "Warning", + Description: "Yellow/red callout for cautions.", + AllowsChildren: true, + DocsURL: "https://www.mintlify.com/docs/components/callouts", + }, +} diff --git a/tools/mdx-validate/internal/catalog/catalog_test.go b/tools/mdx-validate/internal/catalog/catalog_test.go new file mode 100644 index 000000000..451f1abe9 --- /dev/null +++ b/tools/mdx-validate/internal/catalog/catalog_test.go @@ -0,0 +1,95 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package catalog + +import ( + "sort" + "strings" + "testing" +) + +func TestLookupKnownComponents(t *testing.T) { + for _, name := range []string{ + "Accordion", "Card", "CardGroup", "CodeGroup", "Frame", + "Icon", "Note", "Step", "Steps", "Tab", "Tabs", "Tip", "Warning", + } { + c, ok := Lookup(name) + if !ok { + t.Errorf("expected component %q to be in catalog", name) + continue + } + if c.Name != name { + t.Errorf("Lookup(%q).Name = %q, want %q", name, c.Name, name) + } + if c.DocsURL == "" { + t.Errorf("component %q is missing DocsURL", name) + } + } +} + +func TestLookupUnknownReturnsFalse(t *testing.T) { + if _, ok := Lookup("DefinitelyNotAMintlifyComponent"); ok { + t.Error("Lookup of unknown component returned ok=true") + } +} + +func TestRequiredPropsCoverKnownCases(t *testing.T) { + cases := map[string][]string{ + "Card": {"title"}, + "Tab": {"title"}, + "Step": {"title"}, + "Accordion": {"title"}, + "Icon": {"icon"}, + "Tooltip": {"tip"}, + "Update": {"label", "description"}, + } + for name, want := range cases { + c, ok := Lookup(name) + if !ok { + t.Fatalf("missing component %q", name) + } + got := c.RequiredProps() + sort.Strings(got) + sort.Strings(want) + if strings.Join(got, ",") != strings.Join(want, ",") { + t.Errorf("%s required props: got %v, want %v", name, got, want) + } + } +} + +func TestCalloutsHaveNoRequiredProps(t *testing.T) { + for _, name := range []string{"Note", "Tip", "Warning", "Info", "Check"} { + c, ok := Lookup(name) + if !ok { + t.Fatalf("missing callout %q", name) + } + if req := c.RequiredProps(); len(req) != 0 { + t.Errorf("%s should have no required props, got %v", name, req) + } + if !c.AllowsChildren { + t.Errorf("%s should allow children", name) + } + } +} + +func TestPropByNameFindsDeclaredProp(t *testing.T) { + card, _ := Lookup("Card") + p, ok := card.PropByName("href") + if !ok { + t.Fatal("Card.href should exist in catalog") + } + if p.Required { + t.Error("Card.href should not be required") + } + if _, ok := card.PropByName("nonsense"); ok { + t.Error("Card.PropByName(nonsense) should return ok=false") + } +} + +func TestAllReturnsAtLeastBaseline(t *testing.T) { + all := All() + if len(all) < 20 { + t.Errorf("expected at least 20 components in catalog, got %d", len(all)) + } +} diff --git a/tools/mdx-validate/internal/mdxscan/mdxscan.go b/tools/mdx-validate/internal/mdxscan/mdxscan.go new file mode 100644 index 000000000..58da4850c --- /dev/null +++ b/tools/mdx-validate/internal/mdxscan/mdxscan.go @@ -0,0 +1,437 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +// Package mdxscan is a code-fence-aware JSX element scanner. It finds +// capitalized-name JSX elements that appear *outside* of fenced code blocks and +// inline code spans, so the "-in-a-fence" class of false positive +// never reaches a validator. +// +// Masking (what is treated as code and skipped) is exactly two things: fenced +// code blocks (``` or ~~~) and inline code spans (runs of backticks). Nothing +// else — indented code, HTML comments, and {…} expression blocks are not masked. +// Masking preserves byte and line positions (masked characters become spaces, +// newlines are kept) so reported line numbers stay accurate. +package mdxscan + +// Kind classifies a JSX tag. +type Kind int + +const ( + // Open is an opening tag, e.g. . + Open Kind = iota + // Close is a closing tag, e.g. . + Close + // SelfClose is a self-closing tag, e.g. . + SelfClose +) + +// Attr is a single parsed attribute of an Open/SelfClose element. +type Attr struct { + Name string + Value string // unquoted literal value; "" for boolean/expression attrs + IsExpr bool // true when the value is a {…} expression (skip literal checks) +} + +// Element is a JSX element discovered outside of code. +type Element struct { + Name string // tag name, e.g. "Accordion" + Kind Kind + Attrs []Attr // empty for Close + Line int // 1-based line of the opening '<' +} + +// Elements returns every capitalized-name JSX element found outside of fenced +// code blocks and inline code spans, in source order. +func Elements(content []byte) []Element { + masked := mask(content) + return scan(masked) +} + +// isCapital reports whether b is an ASCII uppercase letter. +func isCapital(b byte) bool { return b >= 'A' && b <= 'Z' } + +// isNameByte reports whether b can appear in a JSX tag name after the first +// character (i.e. matches [A-Za-z0-9]). +func isNameByte(b byte) bool { + return (b >= 'A' && b <= 'Z') || (b >= 'a' && b <= 'z') || (b >= '0' && b <= '9') +} + +// mask returns a copy of content with all fenced-code and inline-code bytes +// replaced by spaces, preserving newlines (and thus all line and byte offsets). +func mask(content []byte) []byte { + out := make([]byte, len(content)) + copy(out, content) + + // Phase 1: fenced code blocks, line by line. + lines := splitLines(content) + var ( + inFence bool + fenceChar byte + fenceLen int + ) + for _, ln := range lines { + fc, flen := fenceMarker(content, ln.start, ln.end) + if !inFence { + if fc != 0 { + // Opening fence: mask the fence line, enter fence. + maskRange(out, content, ln.start, ln.end) + inFence = true + fenceChar = fc + fenceLen = flen + } + continue + } + // Inside a fence: mask everything until a matching closing fence. + maskRange(out, content, ln.start, ln.end) + if fc == fenceChar && flen >= fenceLen { + inFence = false + fenceChar = 0 + fenceLen = 0 + } + } + + // Phase 2: inline code spans, over the fence-masked output, per line. A span + // opens at a run of N backticks and closes at the next run of exactly N + // backticks on the same line. Bytes inside fences are already spaces, so they + // won't be mistaken for span delimiters. + for _, ln := range lines { + maskInlineSpans(out, ln.start, ln.end) + } + + return out +} + +// lineSpan is a half-open byte range [start, end) for a single line, excluding +// the trailing newline. +type lineSpan struct { + start int + end int +} + +// splitLines splits content into line spans (newline excluded from each span). +func splitLines(content []byte) []lineSpan { + var lines []lineSpan + start := 0 + for i := 0; i < len(content); i++ { + if content[i] == '\n' { + lines = append(lines, lineSpan{start: start, end: i}) + start = i + 1 + } + } + lines = append(lines, lineSpan{start: start, end: len(content)}) + return lines +} + +// fenceMarker examines the line [start, end) and, if it is a fence line, +// returns the fence character ('`' or '~') and the length of the marker run. +// Otherwise it returns (0, 0). A fence line has ≤3 leading spaces followed by a +// run of ≥3 of the same fence character; the rest of the line is the info +// string and is ignored. +func fenceMarker(content []byte, start, end int) (byte, int) { + i := start + spaces := 0 + for i < end && content[i] == ' ' && spaces < 4 { + spaces++ + i++ + } + if spaces > 3 || i >= end { + return 0, 0 + } + ch := content[i] + if ch != '`' && ch != '~' { + return 0, 0 + } + run := 0 + for i < end && content[i] == ch { + run++ + i++ + } + if run < 3 { + return 0, 0 + } + return ch, run +} + +// maskRange replaces out[start:end) with spaces, but leaves bytes that are +// newlines untouched (there are none within a line span, but this keeps the +// helper safe). +func maskRange(out, content []byte, start, end int) { + for i := start; i < end; i++ { + if content[i] != '\n' { + out[i] = ' ' + } + } +} + +// maskInlineSpans masks inline code spans within the line span [start, end) of +// out. It scans for runs of backticks; a run of N opens a span that closes at +// the next run of exactly N backticks on the same line. +func maskInlineSpans(out []byte, start, end int) { + i := start + for i < end { + if out[i] != '`' { + i++ + continue + } + // Measure the opening run length. + openStart := i + n := 0 + for i < end && out[i] == '`' { + n++ + i++ + } + // Search for a closing run of exactly n backticks. + j := i + for j < end { + if out[j] != '`' { + j++ + continue + } + m := 0 + for j < end && out[j] == '`' { + m++ + j++ + } + if m == n { + // Mask from the opening run through the closing run inclusive. + for k := openStart; k < j; k++ { + out[k] = ' ' + } + i = j + break + } + // Not a match; the run we just consumed is content, continue + // scanning from j. + } + if j >= end { + // No closing run found; the backticks are literal content, leave as is. + i = end + } + } +} + +// scan extracts JSX elements from the (already masked) content. +func scan(content []byte) []Element { + var elements []Element + line := 1 + for i := 0; i < len(content); i++ { + c := content[i] + if c == '\n' { + line++ + continue + } + if c != '<' { + continue + } + // Possible tag start. Look at the following bytes. + j := i + 1 + isClose := false + if j < len(content) && content[j] == '/' { + isClose = true + j++ + } + if j >= len(content) || !isCapital(content[j]) { + // Not a capitalized tag (lowercase, '<>', '' etc.). Skip. + continue + } + // Read the name. + nameStart := j + for j < len(content) && isNameByte(content[j]) { + j++ + } + name := string(content[nameStart:j]) + startLine := line + + // Scan to the terminating '>', respecting "…", '…', and {…}. + body, endIdx, bodyNewlines := scanTagBody(content, j) + if endIdx < 0 { + // Unterminated tag; stop scanning meaningfully but keep line count + // accurate by advancing past what we consumed. + line += bodyNewlines + i = len(content) + continue + } + + el := Element{Name: name, Line: startLine} + if isClose { + el.Kind = Close + } else { + selfClose := false + trimmed := body + // A '/' immediately before the terminating '>' marks self-close. + for k := len(trimmed) - 1; k >= 0; k-- { + if trimmed[k] == ' ' || trimmed[k] == '\t' || trimmed[k] == '\n' || trimmed[k] == '\r' { + continue + } + if trimmed[k] == '/' { + selfClose = true + trimmed = trimmed[:k] + } + break + } + if selfClose { + el.Kind = SelfClose + } else { + el.Kind = Open + } + el.Attrs = parseAttrs(trimmed) + } + + elements = append(elements, el) + line += bodyNewlines + i = endIdx // loop's i++ moves past '>' + } + return elements +} + +// scanTagBody scans from index start (just past the tag name) to the +// terminating '>', respecting quoted strings and {…} expressions. It returns +// the body bytes (between the name and the '>'), the index of the terminating +// '>', and the number of newlines consumed. If unterminated, endIdx is -1. +func scanTagBody(content []byte, start int) (body []byte, endIdx int, newlines int) { + i := start + var ( + inDouble bool + inSingle bool + braceDepth int + ) + for i < len(content) { + c := content[i] + switch { + case c == '\n': + newlines++ + case inDouble: + if c == '"' { + inDouble = false + } + case inSingle: + if c == '\'' { + inSingle = false + } + case braceDepth > 0: + switch c { + case '{': + braceDepth++ + case '}': + braceDepth-- + } + case c == '"': + inDouble = true + case c == '\'': + inSingle = true + case c == '{': + braceDepth++ + case c == '>': + return content[start:i], i, newlines + } + i++ + } + return nil, -1, newlines +} + +// parseAttrs parses the attribute portion of a tag body into Attrs. +func parseAttrs(body []byte) []Attr { + var attrs []Attr + i := 0 + n := len(body) + for i < n { + // Skip whitespace. + for i < n && isSpace(body[i]) { + i++ + } + if i >= n { + break + } + // Attribute name: [A-Za-z][A-Za-z0-9_-]* (be permissive on later bytes). + if !isAttrNameStart(body[i]) { + i++ + continue + } + nameStart := i + i++ + for i < n && isAttrNameByte(body[i]) { + i++ + } + name := string(body[nameStart:i]) + + // Skip whitespace before a possible '='. + k := i + for k < n && isSpace(body[k]) { + k++ + } + if k >= n || body[k] != '=' { + // Boolean attribute. + attrs = append(attrs, Attr{Name: name}) + i = k + continue + } + // Consume '=' and following whitespace. + k++ + for k < n && isSpace(body[k]) { + k++ + } + if k >= n { + attrs = append(attrs, Attr{Name: name}) + i = k + continue + } + switch body[k] { + case '"': + k++ + valStart := k + for k < n && body[k] != '"' { + k++ + } + attrs = append(attrs, Attr{Name: name, Value: string(body[valStart:k])}) + if k < n { + k++ // past closing quote + } + case '\'': + k++ + valStart := k + for k < n && body[k] != '\'' { + k++ + } + attrs = append(attrs, Attr{Name: name, Value: string(body[valStart:k])}) + if k < n { + k++ // past closing quote + } + case '{': + depth := 0 + for k < n { + if body[k] == '{' { + depth++ + } else if body[k] == '}' { + depth-- + if depth == 0 { + k++ + break + } + } + k++ + } + attrs = append(attrs, Attr{Name: name, IsExpr: true}) + default: + // Unquoted value, read until whitespace. + valStart := k + for k < n && !isSpace(body[k]) { + k++ + } + attrs = append(attrs, Attr{Name: name, Value: string(body[valStart:k])}) + } + i = k + } + return attrs +} + +func isSpace(b byte) bool { + return b == ' ' || b == '\t' || b == '\n' || b == '\r' +} + +func isAttrNameStart(b byte) bool { + return (b >= 'A' && b <= 'Z') || (b >= 'a' && b <= 'z') || b == '_' +} + +func isAttrNameByte(b byte) bool { + return isAttrNameStart(b) || (b >= '0' && b <= '9') || b == '-' || b == ':' +} diff --git a/tools/mdx-validate/internal/mdxscan/mdxscan_test.go b/tools/mdx-validate/internal/mdxscan/mdxscan_test.go new file mode 100644 index 000000000..c39c7f9b2 --- /dev/null +++ b/tools/mdx-validate/internal/mdxscan/mdxscan_test.go @@ -0,0 +1,157 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package mdxscan + +import ( + "reflect" + "testing" +) + +func TestElements(t *testing.T) { + tests := []struct { + name string + in string + want []Element + }{ + { + name: "open with attr and matching close", + in: "\ntext\n\n", + want: []Element{ + {Name: "Accordion", Kind: Open, Attrs: []Attr{{Name: "title", Value: "x"}}, Line: 1}, + {Name: "Accordion", Kind: Close, Line: 3}, + }, + }, + { + name: "self closing icon", + in: "\n", + want: []Element{ + {Name: "Icon", Kind: SelfClose, Attrs: []Attr{{Name: "icon", Value: "home"}}, Line: 1}, + }, + }, + { + name: "tag inside backtick fence ignored", + in: "before\n```\n\n```\nafter\n", + want: nil, + }, + { + name: "HOST:PORT in bash fence returns nothing", + in: "Run it:\n```bash\ncurl http:///api\n```\ndone\n", + want: nil, + }, + { + name: "tag inside inline code span ignored", + in: "use the `` component here\n", + want: nil, + }, + { + name: "multi-line opening tag", + in: "\n\n", + want: []Element{ + {Name: "Card", Kind: Open, Attrs: []Attr{ + {Name: "title", Value: "a"}, + {Name: "icon", Value: "home"}, + }, Line: 1}, + {Name: "Card", Kind: Close, Line: 5}, + }, + }, + { + name: "gt inside attribute value does not truncate", + in: " b\">x\n", + want: []Element{ + {Name: "Card", Kind: Open, Attrs: []Attr{{Name: "title", Value: "a > b"}}, Line: 1}, + {Name: "Card", Kind: Close, Line: 1}, + }, + }, + { + name: "expression attribute", + in: "\n", + want: []Element{ + {Name: "Card", Kind: SelfClose, Attrs: []Attr{{Name: "count", IsExpr: true}}, Line: 1}, + }, + }, + { + name: "gt inside expression does not truncate", + in: " b} />\n", + want: []Element{ + {Name: "Card", Kind: SelfClose, Attrs: []Attr{{Name: "show", IsExpr: true}}, Line: 1}, + }, + }, + { + name: "lowercase and fragments ignored", + in: "
\n<>\n\n
\n", + want: nil, + }, + { + name: "boolean attribute", + in: "\n", + want: []Element{ + {Name: "Tab", Kind: Open, Attrs: []Attr{{Name: "disabled"}}, Line: 1}, + }, + }, + { + name: "single quoted value", + in: "\n", + want: []Element{ + {Name: "Icon", Kind: SelfClose, Attrs: []Attr{{Name: "icon", Value: "home"}}, Line: 1}, + }, + }, + { + name: "tilde fence masks tag", + in: "~~~\n\n~~~\n\n", + want: []Element{ + {Name: "Note", Kind: SelfClose, Line: 4}, + }, + }, + { + name: "indented fence up to three spaces", + in: " ```\n\n ```\n", + want: nil, + }, + { + name: "four space indent is not a fence", + in: " ```\n\n ```\n", + want: []Element{ + {Name: "Card", Kind: SelfClose, Line: 2}, + }, + }, + { + name: "closing fence longer than opener", + in: "```\n\n````\n\n", + want: []Element{ + {Name: "Note", Kind: SelfClose, Line: 4}, + }, + }, + { + name: "info string on opening fence", + in: "```js title=\"x\"\n\n```\n", + want: nil, + }, + { + name: "double backtick span with single inside", + in: "text `` ` more`` end \n", + want: []Element{ + {Name: "Note", Kind: SelfClose, Line: 1}, + }, + }, + { + name: "multiple elements line numbers", + in: "line1\na\nline3\nb\n", + want: []Element{ + {Name: "Tip", Kind: Open, Line: 2}, + {Name: "Tip", Kind: Close, Line: 2}, + {Name: "Warning", Kind: Open, Line: 4}, + {Name: "Warning", Kind: Close, Line: 4}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := Elements([]byte(tt.in)) + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("Elements() mismatch\n got: %#v\nwant: %#v", got, tt.want) + } + }) + } +} diff --git a/tools/mdx-validate/internal/validate/components.go b/tools/mdx-validate/internal/validate/components.go new file mode 100644 index 000000000..edd828700 --- /dev/null +++ b/tools/mdx-validate/internal/validate/components.go @@ -0,0 +1,116 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package validate + +import ( + "fmt" + "strings" + + "daml.com/x/dpm-components/mdx-validate/internal/catalog" + "daml.com/x/dpm-components/mdx-validate/internal/mdxscan" +) + +// ComponentValidator checks usage of known Mintlify MDX components against the +// curated catalog. For each element whose name is in the catalog it enforces: +// +// - Missing required prop → Error "component-missing-required-prop" +// - Invalid enum value → Error "component-invalid-enum" +// - Unknown prop → Warning "component-unknown-prop" +// +// Elements whose name is NOT in the catalog produce no findings at all: they +// may be imported snippets or custom components (e.g. ), +// and flagging them would break the zero-false-positive contract. Only Open and +// SelfClose elements are inspected; Close tags carry no attributes. +// +// Out of scope (handled elsewhere or deliberately omitted): parent/child +// nesting rules, prop value type checks beyond enums, and deprecation. +type ComponentValidator struct{} + +// Name implements Validator. +func (ComponentValidator) Name() string { return "components" } + +// Validate implements Validator. +func (v ComponentValidator) Validate(path string, content []byte, p *parsed) []Finding { + var findings []Finding + local := p.localNames + + for _, el := range p.elements { + if el.Kind != mdxscan.Open && el.Kind != mdxscan.SelfClose { + continue + } + if local[el.Name] { + // Locally defined/imported component shadows the catalog name; its + // prop contract differs. See shadow.go. + continue + } + comp, ok := catalog.Lookup(el.Name) + if !ok { + // Unknown component: not ours to judge. See type doc. + continue + } + + // Index the element's attrs by name for required-prop checking. + present := make(map[string]bool, len(el.Attrs)) + for _, a := range el.Attrs { + present[a.Name] = true + } + + // Check 1: missing required props. + for _, req := range comp.RequiredProps() { + if !present[req] { + findings = append(findings, Finding{ + Path: path, + Line: el.Line, + Severity: Error, + Code: "component-missing-required-prop", + Message: fmt.Sprintf( + "<%s> is missing required prop %q", comp.Name, req), + }) + } + } + + // Checks 2 & 3: per-attr enum and unknown-prop validation. + for _, a := range el.Attrs { + spec, ok := comp.PropByName(a.Name) + if !ok { + // Check 3: unknown prop. + findings = append(findings, Finding{ + Path: path, + Line: el.Line, + Severity: Warning, + Code: "component-unknown-prop", + Message: fmt.Sprintf( + "<%s> has unknown prop %q", comp.Name, a.Name), + }) + continue + } + + // Check 2: invalid enum value. Only literal values are checked; + // expression values ({…}) are opaque and skipped. + if len(spec.EnumValues) > 0 && !a.IsExpr && !contains(spec.EnumValues, a.Value) { + findings = append(findings, Finding{ + Path: path, + Line: el.Line, + Severity: Error, + Code: "component-invalid-enum", + Message: fmt.Sprintf( + "<%s> prop %q has invalid value %q; allowed values: %s", + comp.Name, a.Name, a.Value, strings.Join(spec.EnumValues, ", ")), + }) + } + } + } + + return findings +} + +// contains reports whether s is present in vals. +func contains(vals []string, s string) bool { + for _, v := range vals { + if v == s { + return true + } + } + return false +} diff --git a/tools/mdx-validate/internal/validate/components_test.go b/tools/mdx-validate/internal/validate/components_test.go new file mode 100644 index 000000000..0fa62ac92 --- /dev/null +++ b/tools/mdx-validate/internal/validate/components_test.go @@ -0,0 +1,86 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package validate + +import "testing" + +func TestComponentValidator(t *testing.T) { + cases := []struct { + name string + content string + wantCode string // "" means expect no findings + }{ + // Check 1: missing required prop. + { + name: "accordion with required title (pass)", + content: `body`, + }, + { + name: "accordion missing required title (fail)", + content: `body`, + wantCode: "component-missing-required-prop", + }, + { + name: "card missing required title (fail)", + content: `body`, + wantCode: "component-missing-required-prop", + }, + // Check 2: invalid enum value. + { + name: "icon valid iconType enum (pass)", + content: ``, + }, + { + name: "icon invalid iconType enum (fail)", + content: ``, + wantCode: "component-invalid-enum", + }, + // Check 3: unknown prop. + { + name: "card with only known props (pass)", + content: `body`, + }, + { + name: "accordion with unknown prop (fail)", + content: `body`, + wantCode: "component-unknown-prop", + }, + // Zero-false-positive contract: unknown component is ignored entirely, + // even though it has no catalog entry and "props" we cannot validate. + { + name: "unknown component is ignored", + content: ``, + }, + // Required prop supplied as an expression counts as present. + { + name: "required prop as expression counts as present", + content: `body`, + }, + } + + v := ComponentValidator{} + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + content := []byte(tc.content) + got := v.Validate("test.mdx", content, newParsed(content)) + switch { + case tc.wantCode == "": + if len(got) != 0 { + t.Errorf("expected no findings, got %v", got) + } + case len(got) == 0: + t.Errorf("expected finding %q, got none", tc.wantCode) + case got[0].Code != tc.wantCode: + t.Errorf("expected finding code %q, got %q (msg: %q)", + tc.wantCode, got[0].Code, got[0].Message) + } + }) + } +} + +func TestComponentValidatorName(t *testing.T) { + if got := (ComponentValidator{}).Name(); got != "components" { + t.Errorf("Name() = %q, want %q", got, "components") + } +} diff --git a/tools/mdx-validate/internal/validate/frontmatter.go b/tools/mdx-validate/internal/validate/frontmatter.go new file mode 100644 index 000000000..e82fdafa1 --- /dev/null +++ b/tools/mdx-validate/internal/validate/frontmatter.go @@ -0,0 +1,148 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package validate + +import ( + "bufio" + "bytes" + "fmt" + "regexp" + "strings" +) + +// FrontmatterValidator checks the YAML frontmatter block at the top of an +// MDX file. v0.1 enforces: +// +// - The file must open with `---` and have a closing `---`. +// - The block must contain a non-empty `title:` value. +// +// Other Mintlify-known keys (description, sidebarTitle, icon, …) are not +// required for v0.1 and may be added when a real docs-main case demands it. +type FrontmatterValidator struct{} + +// Name implements Validator. +func (FrontmatterValidator) Name() string { return "frontmatter" } + +// reTitleLine matches a `title:` line at the start of a line. Captured +// group is the right-hand side as written, before quote stripping. +// +// `[ \t]*` (not `\s*`) keeps the match anchored to a single line — `\s` +// would eat across `\n` and let an empty `title:` line silently absorb +// the next line's value. +var reTitleLine = regexp.MustCompile(`(?m)^title:[ \t]*(.*?)[ \t]*$`) + +// Validate implements Validator. +// +// Known regex-parsing limitations (acceptable for v0.1; widen scope only +// once a real false positive is observed in docs-main): +// - Block scalars whose body contains a `title:` line (e.g. `description: |` +// followed by an indented `title:`) can match the inner string. +// - Duplicate `title:` keys (which YAML rejects as invalid) are accepted; +// the first match wins. +// +// If/when these limitations bite real pages, switch to a real YAML parser +// (likely gopkg.in/yaml.v3) — the interface and tests stay the same. +func (v FrontmatterValidator) Validate(path string, content []byte, _ *parsed) []Finding { + block, err := extractFrontmatterBlock(content) + if err != nil { + return []Finding{{ + Path: path, + Line: 1, + Severity: Error, + Code: "frontmatter-unreadable", + Message: fmt.Sprintf("failed to scan frontmatter: %v", err), + }} + } + if block == nil { + return []Finding{{ + Path: path, + Line: 1, + Severity: Error, + Code: "frontmatter-missing", + Message: "MDX file has no YAML frontmatter (expected leading `---` block)", + }} + } + + m := reTitleLine.FindSubmatch(block.body) + if m == nil { + return []Finding{{ + Path: path, + Line: block.startLine, + Severity: Error, + Code: "frontmatter-missing-title", + Message: "frontmatter must declare a `title:` field", + }} + } + + if isEmptyTitle(string(m[1])) { + return []Finding{{ + Path: path, + Line: block.startLine, + Severity: Error, + Code: "frontmatter-empty-title", + Message: "frontmatter `title:` is empty", + }} + } + + return nil +} + +// frontmatterBlock is the slice of bytes between (and excluding) the two +// `---` delimiter lines, plus the 1-based line number of the opening +// delimiter for diagnostic attribution. +type frontmatterBlock struct { + body []byte + startLine int +} + +// extractFrontmatterBlock returns the YAML body between the leading and +// trailing `---` markers. +// +// Returns: +// - (*block, nil) when a complete frontmatter block was found. +// - (nil, nil) when no frontmatter is present (legitimate "missing" case). +// - (nil, err) when the scanner fails (e.g. a single line longer than +// the 1 MiB buffer). Callers should report this as a +// distinct, attributable finding rather than confusing it +// with a missing block. +func extractFrontmatterBlock(content []byte) (*frontmatterBlock, error) { + scanner := bufio.NewScanner(bytes.NewReader(content)) + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) + + if !scanner.Scan() { + if err := scanner.Err(); err != nil { + return nil, err + } + return nil, nil + } + if scanner.Text() != "---" { + return nil, nil + } + + var body bytes.Buffer + startLine := 1 + for scanner.Scan() { + line := scanner.Text() + if line == "---" { + return &frontmatterBlock{body: body.Bytes(), startLine: startLine}, nil + } + body.WriteString(line) + body.WriteByte('\n') + } + if err := scanner.Err(); err != nil { + return nil, err + } + // EOF without seeing the closing `---` — treat as missing block, not error. + return nil, nil +} + +// isEmptyTitle returns true if the captured title value resolves to an +// empty string after quote and whitespace stripping. Accepts unquoted, +// double-quoted, and single-quoted forms. +func isEmptyTitle(raw string) bool { + v := strings.TrimSpace(raw) + v = strings.TrimSuffix(strings.TrimPrefix(v, `"`), `"`) + v = strings.TrimSuffix(strings.TrimPrefix(v, `'`), `'`) + return strings.TrimSpace(v) == "" +} diff --git a/tools/mdx-validate/internal/validate/frontmatter_test.go b/tools/mdx-validate/internal/validate/frontmatter_test.go new file mode 100644 index 000000000..fcbcf0a71 --- /dev/null +++ b/tools/mdx-validate/internal/validate/frontmatter_test.go @@ -0,0 +1,127 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package validate + +import "testing" + +func TestFrontmatterValidator(t *testing.T) { + cases := []struct { + name string + content string + wantCode string // "" means expect no findings + }{ + { + name: "valid frontmatter with unquoted title", + content: `--- +title: Quickstart +description: Get started fast +--- + +# Quickstart + +body +`, + }, + { + name: "valid frontmatter with double-quoted title", + content: `--- +title: "Canton Quickstart" +--- + +body +`, + }, + { + name: "valid frontmatter with single-quoted title", + content: `--- +title: 'Canton Quickstart' +--- + +body +`, + }, + { + name: "no frontmatter at all", + content: `# Some Page + +body without frontmatter +`, + wantCode: "frontmatter-missing", + }, + { + name: "unclosed frontmatter block", + content: `--- +title: Foo +description: bar +`, + wantCode: "frontmatter-missing", + }, + { + name: "frontmatter without title", + content: `--- +description: a page with no title +sidebarTitle: foo +--- + +body +`, + wantCode: "frontmatter-missing-title", + }, + { + name: "empty title (unquoted)", + content: `--- +title: +description: x +--- + +body +`, + wantCode: "frontmatter-empty-title", + }, + { + name: "empty title (quoted)", + content: `--- +title: "" +--- + +body +`, + wantCode: "frontmatter-empty-title", + }, + { + name: "empty title (single-quoted whitespace)", + content: `--- +title: ' ' +--- + +body +`, + wantCode: "frontmatter-empty-title", + }, + } + + v := FrontmatterValidator{} + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := v.Validate("test.mdx", []byte(tc.content), nil) + switch { + case tc.wantCode == "": + if len(got) != 0 { + t.Errorf("expected no findings, got %v", got) + } + case len(got) == 0: + t.Errorf("expected finding %q, got none", tc.wantCode) + case got[0].Code != tc.wantCode: + t.Errorf("expected finding code %q, got %q (msg: %q)", + tc.wantCode, got[0].Code, got[0].Message) + } + }) + } +} + +func TestFrontmatterValidatorName(t *testing.T) { + if got := (FrontmatterValidator{}).Name(); got != "frontmatter" { + t.Errorf("Name() = %q, want %q", got, "frontmatter") + } +} diff --git a/tools/mdx-validate/internal/validate/images.go b/tools/mdx-validate/internal/validate/images.go new file mode 100644 index 000000000..d3942f6d8 --- /dev/null +++ b/tools/mdx-validate/internal/validate/images.go @@ -0,0 +1,186 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package validate + +import ( + "fmt" + "os" + "path/filepath" + "regexp" + "strings" +) + +// ImageValidator checks that local image references resolve to a real file on +// disk. It looks at Markdown image syntax (`![alt](path)`) and JSX/HTML +// `src="…"` attributes that point at an image. For each local image reference +// it resolves the target and reports `image-not-found` (Error) when the file is +// absent. +// +// Resolution rules: +// - A path beginning with `/` is rooted at the docs root: the nearest +// ancestor directory of the file that contains `docs.json` (the Mintlify +// site root). If no docs root is found, absolute references are skipped +// rather than guessed at. +// - Any other path is resolved relative to the directory of the .mdx file +// (so `../images/x.png` walks up from the page as written). +// +// Existence is checked by reading the target's directory and matching the exact +// file name. That makes the check case-sensitive on every OS, so a reference to +// `Foo.png` whose file is actually `foo.png` is reported even on a +// case-insensitive macOS filesystem (where it would silently break the Linux +// build). +// +// Deliberately out of scope (keeps the check false-positive-free and fast): +// - External references (http/https, protocol-relative `//`, `data:`, +// `mailto:`) — those are an external-link concern, not file existence. +// - Expression sources (`src={…}`) — not a static path. +// - Non-image extensions — only known image extensions are checked, so a +// ``, false}, + {"expression src skipped", ``, false}, + {"non-image src skipped", ``, false}, + {"query/fragment stripped, present", "![a](/images/present.png?v=2#x)", false}, + {"case mismatch is reported", "![a](/images/Present.png)", true}, + {"title after url, present", `![a](/images/present.png "caption")`, false}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := codes(tc.content) + hasErr := false + for _, c := range got { + if c == "image-not-found" { + hasErr = true + } + } + if hasErr != tc.wantErr { + t.Errorf("content %q: got codes %v, wantErr=%v", tc.content, got, tc.wantErr) + } + }) + } +} diff --git a/tools/mdx-validate/internal/validate/parsed.go b/tools/mdx-validate/internal/validate/parsed.go new file mode 100644 index 000000000..3c7ce3804 --- /dev/null +++ b/tools/mdx-validate/internal/validate/parsed.go @@ -0,0 +1,24 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package validate + +import "daml.com/x/dpm-components/mdx-validate/internal/mdxscan" + +// parsed holds the per-file derived data that more than one validator needs: +// the masked element scan and the set of locally shadowed component names. +// Both are pure functions of the file content, so the runner computes them +// once per file and shares the result, instead of each validator tokenizing +// the same bytes independently. +type parsed struct { + elements []mdxscan.Element + localNames map[string]bool +} + +// newParsed derives the shared per-file data from content. +func newParsed(content []byte) *parsed { + return &parsed{ + elements: mdxscan.Elements(content), + localNames: localComponentNames(content), + } +} diff --git a/tools/mdx-validate/internal/validate/runner.go b/tools/mdx-validate/internal/validate/runner.go new file mode 100644 index 000000000..2afcdfa81 --- /dev/null +++ b/tools/mdx-validate/internal/validate/runner.go @@ -0,0 +1,208 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package validate + +import ( + "fmt" + "io" + "os" + "path/filepath" + "sort" + "strings" +) + +// DefaultValidators returns the validators registered for v0.1. +// New validators are added here as they land. +func DefaultValidators() []Validator { + return []Validator{ + FrontmatterValidator{}, + ComponentValidator{}, + StructureValidator{}, + ImageValidator{}, + } +} + +// Runner orchestrates a set of validators across one or more files. +type Runner struct { + Validators []Validator +} + +// Counts holds the per-severity tallies of a run. +type Counts struct { + Errors int + Warnings int + Files int +} + +// HasBlockingErrors reports whether the run produced any Error findings, +// or whether --strict promotes warnings to errors. +func (c Counts) HasBlockingErrors(strict bool) bool { + if c.Errors > 0 { + return true + } + return strict && c.Warnings > 0 +} + +// RunFile reads a single file from disk and runs every validator over it. +// Returns the findings produced by the validators. +func (r Runner) RunFile(path string) ([]Finding, error) { + content, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("read %s: %w", path, err) + } + return r.RunBytes(path, content), nil +} + +// RunBytes runs every validator over an in-memory file. Useful for tests. +func (r Runner) RunBytes(path string, content []byte) []Finding { + p := newParsed(content) + var out []Finding + for _, v := range r.Validators { + out = append(out, v.Validate(path, content, p)...) + } + return out +} + +// RunPaths walks the given paths (files or directories), validates each +// .mdx file, and returns aggregated findings plus per-run counts. +// +// Per-file read failures (permission denied, transient FS error) become +// "io-error" findings of severity Error rather than aborting the run, so +// one bad file does not discard findings already collected from siblings. +func (r Runner) RunPaths(paths []string) ([]Finding, Counts, error) { + files, err := expandToMDXFiles(paths) + if err != nil { + return nil, Counts{}, err + } + + var all []Finding + for _, f := range files { + findings, err := r.RunFile(f) + if err != nil { + all = append(all, Finding{ + Path: f, + Line: 0, + Severity: Error, + Code: "io-error", + Message: err.Error(), + }) + continue + } + all = append(all, findings...) + } + + counts := Counts{Files: len(files)} + for _, f := range all { + if f.Severity == Error { + counts.Errors++ + } else if f.Severity == Warning { + counts.Warnings++ + } + } + return all, counts, nil +} + +// FormatFindings writes a stable, grouped representation of findings to w. +// Order: path, then severity (errors before warnings), then line, then code. +func FormatFindings(w io.Writer, findings []Finding) { + sorted := make([]Finding, len(findings)) + copy(sorted, findings) + sort.SliceStable(sorted, func(i, j int) bool { + switch { + case sorted[i].Path != sorted[j].Path: + return sorted[i].Path < sorted[j].Path + case sorted[i].Severity != sorted[j].Severity: + // Lower numeric value sorts first; Error > Warning numerically, + // so flip with > so Error rows come first within a path. + return sorted[i].Severity > sorted[j].Severity + case sorted[i].Line != sorted[j].Line: + return sorted[i].Line < sorted[j].Line + default: + return sorted[i].Code < sorted[j].Code + } + }) + for _, f := range sorted { + fmt.Fprintln(w, f.Format()) + } +} + +// expandToMDXFiles resolves the given paths to a sorted, de-duplicated +// list of .mdx files to validate. Directories are walked recursively; +// non-.mdx files and snippet files (see SkipPath) are excluded. +func expandToMDXFiles(paths []string) ([]string, error) { + seen := map[string]struct{}{} + var out []string + for _, p := range paths { + info, err := os.Stat(p) + if err != nil { + return nil, fmt.Errorf("stat %s: %w", p, err) + } + if info.IsDir() { + // os.Stat follows symlinks, so a symlink-to-directory reaches here, + // but filepath.WalkDir lstats its root and would treat that symlink + // as a single non-directory entry, walking nothing. Resolve the root + // symlink so the real directory is walked. Only the explicit symlink + // target is resolved, so non-symlink paths keep their as-given prefix + // in reported findings. + walkRoot := p + if li, lerr := os.Lstat(p); lerr == nil && li.Mode()&os.ModeSymlink != 0 { + if resolved, rerr := filepath.EvalSymlinks(p); rerr == nil { + walkRoot = resolved + } + } + err := filepath.WalkDir(walkRoot, func(path string, d os.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + return nil + } + if !strings.HasSuffix(path, ".mdx") { + return nil + } + if SkipPath(path) { + return nil + } + if _, dup := seen[path]; dup { + return nil + } + seen[path] = struct{}{} + out = append(out, path) + return nil + }) + if err != nil { + return nil, err + } + } else { + if !strings.HasSuffix(p, ".mdx") { + continue + } + if SkipPath(p) { + continue + } + if _, dup := seen[p]; dup { + continue + } + seen[p] = struct{}{} + out = append(out, p) + } + } + sort.Strings(out) + return out, nil +} + +// SkipPath reports whether a file path should be excluded from validation. +// +// v0.1 skips any file under a `snippets/` directory because Mintlify +// snippets are reusable content fragments that don't have frontmatter by +// design (they're meant to be embedded into pages with ``). +// Validating them as standalone pages produces noise without value. +func SkipPath(path string) bool { + for _, seg := range strings.Split(filepath.ToSlash(path), "/") { + if seg == "snippets" { + return true + } + } + return false +} diff --git a/tools/mdx-validate/internal/validate/runner_test.go b/tools/mdx-validate/internal/validate/runner_test.go new file mode 100644 index 000000000..394788339 --- /dev/null +++ b/tools/mdx-validate/internal/validate/runner_test.go @@ -0,0 +1,161 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package validate + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestRunner_RunBytes(t *testing.T) { + r := Runner{Validators: DefaultValidators()} + + good := []byte(`--- +title: Hello +--- + +body +`) + if got := r.RunBytes("a.mdx", good); len(got) != 0 { + t.Errorf("expected no findings on good MDX, got %v", got) + } + + bad := []byte(`# missing frontmatter +`) + got := r.RunBytes("b.mdx", bad) + if len(got) != 1 || got[0].Code != "frontmatter-missing" { + t.Errorf("expected one frontmatter-missing finding, got %v", got) + } +} + +func TestRunner_RunPaths(t *testing.T) { + dir := t.TempDir() + + // Three files: one valid, one missing title, one not .mdx (ignored). + files := map[string]string{ + "good.mdx": "---\ntitle: Good\n---\n\nbody\n", + "bad.mdx": "---\ndescription: no title here\n---\n\nbody\n", + "skip.txt": "not an MDX file", + } + for name, content := range files { + if err := os.WriteFile(filepath.Join(dir, name), []byte(content), 0o644); err != nil { + t.Fatal(err) + } + } + + r := Runner{Validators: DefaultValidators()} + findings, counts, err := r.RunPaths([]string{dir}) + if err != nil { + t.Fatalf("RunPaths: %v", err) + } + if counts.Files != 2 { + t.Errorf("expected 2 .mdx files walked, got %d", counts.Files) + } + if counts.Errors != 1 { + t.Errorf("expected 1 error, got %d", counts.Errors) + } + if counts.Warnings != 0 { + t.Errorf("expected 0 warnings, got %d", counts.Warnings) + } + if len(findings) != 1 || findings[0].Code != "frontmatter-missing-title" { + t.Errorf("unexpected findings: %v", findings) + } +} + +func TestRunner_HasBlockingErrors(t *testing.T) { + cases := []struct { + name string + counts Counts + strict bool + want bool + }{ + {"clean", Counts{Errors: 0, Warnings: 0}, false, false}, + {"clean strict", Counts{Errors: 0, Warnings: 0}, true, false}, + {"warnings only", Counts{Errors: 0, Warnings: 3}, false, false}, + {"warnings strict", Counts{Errors: 0, Warnings: 3}, true, true}, + {"errors", Counts{Errors: 1, Warnings: 0}, false, true}, + {"errors strict", Counts{Errors: 1, Warnings: 5}, true, true}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if got := tc.counts.HasBlockingErrors(tc.strict); got != tc.want { + t.Errorf("HasBlockingErrors(strict=%v) = %v, want %v", + tc.strict, got, tc.want) + } + }) + } +} + +func TestSkipPath(t *testing.T) { + cases := []struct { + path string + want bool + }{ + {"docs-main/foo.mdx", false}, + {"docs-main/appdev/quickstart.mdx", false}, + {"docs-main/snippets/header.mdx", true}, + {"docs-main/snippets/external/foo.mdx", true}, + {"docs-main/appdev/snippets/example.mdx", true}, + {"snippets/top-level.mdx", true}, + {"docs-main/snippets-archive/foo.mdx", false}, // segment match, not prefix + } + for _, tc := range cases { + t.Run(tc.path, func(t *testing.T) { + if got := SkipPath(tc.path); got != tc.want { + t.Errorf("SkipPath(%q) = %v, want %v", tc.path, got, tc.want) + } + }) + } +} + +func TestRunPaths_SkipsSnippets(t *testing.T) { + dir := t.TempDir() + if err := os.Mkdir(filepath.Join(dir, "snippets"), 0o755); err != nil { + t.Fatal(err) + } + files := map[string]string{ + "page.mdx": "---\ntitle: P\n---\nbody\n", + "snippets/partial.mdx": "no frontmatter, but a snippet, so skip\n", + } + for name, content := range files { + if err := os.WriteFile(filepath.Join(dir, name), []byte(content), 0o644); err != nil { + t.Fatal(err) + } + } + r := Runner{Validators: DefaultValidators()} + findings, counts, err := r.RunPaths([]string{dir}) + if err != nil { + t.Fatalf("RunPaths: %v", err) + } + if counts.Files != 1 { + t.Errorf("expected 1 file (snippet skipped), got %d", counts.Files) + } + if len(findings) != 0 { + t.Errorf("expected no findings on a valid page when snippet is skipped, got %v", findings) + } +} + +func TestFormatFindings_StableOrder(t *testing.T) { + findings := []Finding{ + {Path: "z.mdx", Line: 1, Severity: Error, Code: "x", Message: "z"}, + {Path: "a.mdx", Line: 2, Severity: Error, Code: "x", Message: "a2"}, + {Path: "a.mdx", Line: 1, Severity: Error, Code: "x", Message: "a1"}, + {Path: "a.mdx", Line: 1, Severity: Error, Code: "y", Message: "a1y"}, + } + var buf bytes.Buffer + FormatFindings(&buf, findings) + got := strings.TrimRight(buf.String(), "\n") + want := strings.Join([]string{ + "a.mdx:1: error x: a1", + "a.mdx:1: error y: a1y", + "a.mdx:2: error x: a2", + "z.mdx:1: error x: z", + }, "\n") + if got != want { + t.Errorf("format order mismatch:\nwant:\n%s\n\ngot:\n%s", want, got) + } +} diff --git a/tools/mdx-validate/internal/validate/shadow.go b/tools/mdx-validate/internal/validate/shadow.go new file mode 100644 index 000000000..b0656285a --- /dev/null +++ b/tools/mdx-validate/internal/validate/shadow.go @@ -0,0 +1,64 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package validate + +import "regexp" + +// Component-name validation (catalog lookup, required props, tag balance) only +// makes sense for the *Mintlify built-in* of that name. A page can define or +// import its own component that shadows a catalog name — e.g. +// `export const Tooltip = ({children, content}) => …` — with a different prop +// contract. Checking such a usage against the catalog produces false positives, +// so both validators skip any name that the page defines or imports locally. +// +// These patterns are intentionally narrow (named imports, default imports, and +// top-level export const/function/let/var) and match capitalized identifiers +// only, since component names are capitalized. Anything fancier (namespace +// imports, re-exports) is out of scope. +var ( + reExportDecl = regexp.MustCompile(`(?m)^\s*export\s+(?:const|function|let|var)\s+([A-Z][A-Za-z0-9_]*)`) + reDefaultImp = regexp.MustCompile(`(?m)^\s*import\s+([A-Z][A-Za-z0-9_]*)\s+from\b`) + reNamedImp = regexp.MustCompile(`(?ms)^\s*import\s*\{([^}]*)\}\s*from\b`) + reIdent = regexp.MustCompile(`[A-Z][A-Za-z0-9_]*`) +) + +// localComponentNames returns the set of capitalized component names that the +// file defines (export const/function/…) or imports (default or named). Names +// in this set are locally shadowed and must not be checked against the catalog. +func localComponentNames(content []byte) map[string]bool { + out := map[string]bool{} + for _, m := range reExportDecl.FindAllSubmatch(content, -1) { + out[string(m[1])] = true + } + for _, m := range reDefaultImp.FindAllSubmatch(content, -1) { + out[string(m[1])] = true + } + // Named imports: `import { A, B as C } from '…'`. Take the local binding, + // which is the identifier after `as` when present, otherwise the name. + for _, m := range reNamedImp.FindAllSubmatch(content, -1) { + for _, clause := range splitCommas(string(m[1])) { + ids := reIdent.FindAllString(clause, -1) + if len(ids) == 0 { + continue + } + // `A as C` → bind C (last ident); plain `A` → bind A. + out[ids[len(ids)-1]] = true + } + } + return out +} + +// splitCommas splits a brace-import body on commas. A tiny helper kept separate +// so the import-clause handling above stays readable. +func splitCommas(s string) []string { + var out []string + start := 0 + for i := 0; i < len(s); i++ { + if s[i] == ',' { + out = append(out, s[start:i]) + start = i + 1 + } + } + return append(out, s[start:]) +} diff --git a/tools/mdx-validate/internal/validate/shadow_test.go b/tools/mdx-validate/internal/validate/shadow_test.go new file mode 100644 index 000000000..8afd595f1 --- /dev/null +++ b/tools/mdx-validate/internal/validate/shadow_test.go @@ -0,0 +1,115 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package validate + +import "testing" + +func TestLocalComponentNames(t *testing.T) { + cases := []struct { + name string + content string + wantPresent []string + wantAbsent []string + }{ + { + name: "export const", + content: "export const Tooltip = ({children, content}) => {};\n", + wantPresent: []string{"Tooltip"}, + }, + { + name: "export function/let/var keywords", + content: "export function Foo() {}\n" + + "export let Bar = 1\n" + + "export var Baz = 2\n", + wantPresent: []string{"Foo", "Bar", "Baz"}, + }, + { + name: "default import", + content: "import Foo from '/snippets/foo.mdx'\n", + wantPresent: []string{"Foo"}, + }, + { + name: "named import single", + content: "import { Bar } from '/snippets/bar.mdx'\n", + wantPresent: []string{"Bar"}, + }, + { + name: "named import multiple", + content: "import { A, B } from '/snippets/x.mdx'\n", + wantPresent: []string{"A", "B"}, + }, + { + name: "aliased named import binds local name", + content: "import { Original as Alias } from '/snippets/x.mdx'\n", + wantPresent: []string{"Alias"}, + wantAbsent: []string{"Original"}, + }, + { + name: "multiline named import block", + content: "import {\n" + + " One,\n" + + " Two as Three,\n" + + "} from '/snippets/x.mdx'\n", + wantPresent: []string{"One", "Three"}, + wantAbsent: []string{"Two"}, + }, + { + name: "lowercase identifiers ignored", + content: "export const helper = 1\n" + + "import { networkData } from '/snippets/data.mdx'\n", + wantAbsent: []string{"helper", "networkData"}, + }, + { + name: "component only used, not declared or imported", + content: "---\ntitle: T\n---\n\nhello\n", + wantAbsent: []string{"Note"}, + }, + { + name: "real-world dashboard shape", + content: "import { networkData } from '/snippets/generated/data.mdx';\n" + + "export const Tooltip = ({ children, content }) => {\n" + + " return
{content}
;\n" + + "};\n", + wantPresent: []string{"Tooltip"}, + wantAbsent: []string{"networkData"}, + }, + // --- Pinned limitations: assert current behavior so a future change + // to shadow.go's regexes is a deliberate decision, not an accident. --- + { + name: "type-only import is not matched (limitation)", + content: "import type { Foo } from '/snippets/foo.mdx'\n", + wantAbsent: []string{"Foo"}, + }, + { + name: "mixed default+named import: neither default nor named binding is matched (limitation)", + content: "import React, { Card } from 'react'\n", + wantAbsent: []string{"React", "Card"}, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := localComponentNames([]byte(tc.content)) + for _, name := range tc.wantPresent { + if !got[name] { + t.Errorf("expected %q to be detected as local; got set %v", name, keys(got)) + } + } + for _, name := range tc.wantAbsent { + if got[name] { + t.Errorf("expected %q NOT to be detected as local; got set %v", name, keys(got)) + } + } + }) + } +} + +// keys returns the map keys for readable failure messages. +func keys(m map[string]bool) []string { + out := make([]string, 0, len(m)) + for k := range m { + out = append(out, k) + } + return out +} diff --git a/tools/mdx-validate/internal/validate/structure.go b/tools/mdx-validate/internal/validate/structure.go new file mode 100644 index 000000000..537be3a16 --- /dev/null +++ b/tools/mdx-validate/internal/validate/structure.go @@ -0,0 +1,104 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package validate + +import ( + "fmt" + + "daml.com/x/dpm-components/mdx-validate/internal/catalog" + "daml.com/x/dpm-components/mdx-validate/internal/mdxscan" +) + +// StructureValidator checks JSX tag balance and nesting. It maintains a stack +// over the elements returned by mdxscan.Elements (capitalized component tags +// found outside of fenced code blocks and inline code spans), but tracks ONLY +// known catalog components that are not locally shadowed. Non-catalog tokens — +// placeholders like , generics like , and custom/imported +// components — are ignored entirely, because they are not balanced JSX and +// would otherwise poison the stack and cascade into false positives. The rules: +// +// - Open → push the element. +// - SelfClose → no-op (self-balanced). +// - Close → must match the top of the stack. A mismatch or an empty +// stack is an Error "jsx-unexpected-close". +// - End of file with a non-empty stack → one Error "jsx-unclosed-tag" per +// still-open element, attributed to its opening line. +// +// Out of scope (deliberately omitted because they cannot be made +// false-positive-free against real prose and JS imports): {expression} brace +// balancing, stray '<' / '>' detection, and lowercase/HTML tag balancing. +// Only the capitalized-component tag stack is tracked here. +type StructureValidator struct{} + +// Name implements Validator. +func (StructureValidator) Name() string { return "structure" } + +// Validate implements Validator. +func (v StructureValidator) Validate(path string, content []byte, p *parsed) []Finding { + var findings []Finding + var stack []mdxscan.Element + local := p.localNames + + for _, el := range p.elements { + // Only balance known catalog components that are not locally shadowed. + // Everything else (placeholders, generics, custom/imported components) + // is not balanced JSX we can reason about, so it is skipped entirely. + if _, ok := catalog.Lookup(el.Name); !ok || local[el.Name] { + continue + } + switch el.Kind { + case mdxscan.Open: + stack = append(stack, el) + case mdxscan.SelfClose: + // Self-closing tags are balanced on their own; ignore. + case mdxscan.Close: + if len(stack) == 0 { + // Stray close: nothing is open. + findings = append(findings, Finding{ + Path: path, + Line: el.Line, + Severity: Error, + Code: "jsx-unexpected-close", + Message: fmt.Sprintf( + "unexpected closing tag : no open tag to close", el.Name), + }) + continue + } + top := stack[len(stack)-1] + if top.Name == el.Name { + // Matching close: pop it. + stack = stack[:len(stack)-1] + continue + } + // Mismatched close. We report the error but do NOT pop the stack: + // the close is treated as stray and the open tag at the top is left + // in place so it can still be matched by its own correct closing tag + // later (and otherwise be reported as jsx-unclosed-tag at EOF). This + // keeps behavior predictable for the common "wrong tag name typed" + // case without cascading into spurious extra findings. + findings = append(findings, Finding{ + Path: path, + Line: el.Line, + Severity: Error, + Code: "jsx-unexpected-close", + Message: fmt.Sprintf( + "unexpected closing tag : currently open tag is <%s> (opened at line %d)", + el.Name, top.Name, top.Line), + }) + } + } + + // Anything left on the stack was never closed. + for _, open := range stack { + findings = append(findings, Finding{ + Path: path, + Line: open.Line, + Severity: Error, + Code: "jsx-unclosed-tag", + Message: fmt.Sprintf("unclosed tag <%s>: no matching closing tag", open.Name), + }) + } + + return findings +} diff --git a/tools/mdx-validate/internal/validate/structure_test.go b/tools/mdx-validate/internal/validate/structure_test.go new file mode 100644 index 000000000..7cf48d5e8 --- /dev/null +++ b/tools/mdx-validate/internal/validate/structure_test.go @@ -0,0 +1,81 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package validate + +import "testing" + +func TestStructureValidator(t *testing.T) { + cases := []struct { + name string + content string + wantCodes []string // nil/empty means expect no findings; order matters + }{ + { + name: "balanced nested doc (pass)", + content: ` +body +body +`, + }, + { + name: "missing close (fail)", + content: `body never closed`, + wantCodes: []string{"jsx-unclosed-tag"}, + }, + { + name: "mismatched close (fail)", + content: ` +text +`, + // The mismatched is reported as unexpected; the open + // is left on the stack and reported as unclosed at EOF. + wantCodes: []string{"jsx-unexpected-close", "jsx-unclosed-tag"}, + }, + { + name: "stray close with empty stack (fail)", + content: ``, + wantCodes: []string{"jsx-unexpected-close"}, + }, + { + name: "self-closing tag does not affect balance (pass)", + content: ` + +body +`, + }, + { + name: "unclosed tag inside code fence is ignored (pass)", + content: "Here is an example:\n" + + "```mdx\n" + + "\n" + + "never closed in the fence\n" + + "```\n" + + "And normal prose continues.", + }, + } + + v := StructureValidator{} + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + content := []byte(tc.content) + got := v.Validate("test.mdx", content, newParsed(content)) + if len(got) != len(tc.wantCodes) { + t.Fatalf("expected %d findings %v, got %d: %v", + len(tc.wantCodes), tc.wantCodes, len(got), got) + } + for i, want := range tc.wantCodes { + if got[i].Code != want { + t.Errorf("finding[%d]: expected code %q, got %q (msg: %q)", + i, want, got[i].Code, got[i].Message) + } + } + }) + } +} + +func TestStructureValidatorName(t *testing.T) { + if got := (StructureValidator{}).Name(); got != "structure" { + t.Errorf("Name() = %q, want %q", got, "structure") + } +} diff --git a/tools/mdx-validate/internal/validate/types.go b/tools/mdx-validate/internal/validate/types.go new file mode 100644 index 000000000..5f60883d4 --- /dev/null +++ b/tools/mdx-validate/internal/validate/types.go @@ -0,0 +1,68 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +// Package validate implements the validators that mdx-validate runs over +// Mintlify MDX files. Each validator is a small, independently testable +// unit; the runner composes them and aggregates findings. +package validate + +import "fmt" + +// Severity classifies a finding as a blocking error or a non-blocking warning. +// +// The zero value is SeverityUnspecified so a Finding constructed without an +// explicit severity does not silently default to Error or Warning. +type Severity int + +const ( + // SeverityUnspecified is the zero value; never produced by a real validator. + SeverityUnspecified Severity = iota + // Warning is reported but does not affect exit status unless --strict is set. + Warning + // Error makes the validator exit non-zero. + Error +) + +// String returns the lowercase human label for a severity. +func (s Severity) String() string { + switch s { + case Error: + return "error" + case Warning: + return "warning" + default: + return "unspecified" + } +} + +// Finding is a single problem reported by a validator. +type Finding struct { + Path string // file path, relative to wherever the runner was invoked + Line int // 1-based line number; 0 if not applicable + Severity Severity + Code string // short stable identifier, e.g. "frontmatter-missing-title" + Message string // human-readable explanation +} + +// Format returns a single-line representation of the finding. +// +// docs-main/foo.mdx:0: error frontmatter-missing-title: ... +func (f Finding) Format() string { + return fmt.Sprintf("%s:%d: %s %s: %s", + f.Path, f.Line, f.Severity, f.Code, f.Message) +} + +// Validator inspects a single MDX file's bytes and reports findings. +// The runner currently invokes validators sequentially per file; the +// interface itself is goroutine-friendly (validators should not retain +// state across calls), but no claim of cross-file parallelism is made +// today. +type Validator interface { + // Name is a short identifier for diagnostics and tests. + Name() string + // Validate returns zero or more findings for the given file. p carries the + // shared per-file parse (element scan and local-name set) so validators + // that need it do not re-tokenize the same content; validators that don't + // need it ignore p. + Validate(path string, content []byte, p *parsed) []Finding +} diff --git a/tools/mdx-validate/smoke-test.sh b/tools/mdx-validate/smoke-test.sh new file mode 100755 index 000000000..cdb3877a5 --- /dev/null +++ b/tools/mdx-validate/smoke-test.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +# Minimal smoke test for mdx-validate. Run from this directory after `make build`. +# Verifies the binary handles the cases that matter for v0.1: +# 1. valid frontmatter passes (exit 0) +# 2. missing title fails (exit 1) +# 3. --version prints +# 4. snippets/ paths are skipped during dir walk + +set -euo pipefail + +# Run from this script's directory so BIN resolves regardless of CWD. +cd "$(dirname "$0")" +BIN="$(pwd)/mdx-validate" + +# Each case uses its own scratch directory to avoid cross-contamination. +mktemp_subdir() { mktemp -d; } + +# --- Case 1: valid file passes --- +TMP1=$(mktemp_subdir) +trap 'rm -rf "$TMP1"' EXIT +cat > "$TMP1/good.mdx" <<'EOF' +--- +title: Smoke Page +--- + +body +EOF +"$BIN" "$TMP1/good.mdx" >/dev/null +echo " ok: valid file passes" +rm -rf "$TMP1" +trap - EXIT + +# --- Case 2: missing title fails --- +TMP2=$(mktemp_subdir) +trap 'rm -rf "$TMP2"' EXIT +cat > "$TMP2/bad.mdx" <<'EOF' +--- +description: no title +--- + +body +EOF +if "$BIN" "$TMP2/bad.mdx" >/dev/null 2>&1; then + echo "FAIL: missing-title file should have failed" + exit 1 +fi +echo " ok: missing title fails as expected" +rm -rf "$TMP2" +trap - EXIT + +# --- Case 3: --version prints --- +"$BIN" --version | grep -q "mdx-validate" +echo " ok: --version prints" + +# --- Case 4: snippet skip during dir walk --- +TMP4=$(mktemp_subdir) +trap 'rm -rf "$TMP4"' EXIT +mkdir -p "$TMP4/snippets" +echo "no frontmatter, but it's a snippet" > "$TMP4/snippets/partial.mdx" +cat > "$TMP4/page.mdx" <<'EOF' +--- +title: Page +--- + +body +EOF +"$BIN" "$TMP4" >/dev/null +echo " ok: snippets directory is skipped during dir walk" +rm -rf "$TMP4" +trap - EXIT + +echo "smoke OK" From d8d8e203188991b4736a7bd0301acb1d729d0693 Mon Sep 17 00:00:00 2001 From: miller-da Date: Mon, 22 Jun 2026 20:36:26 -0500 Subject: [PATCH 2/2] clean up Signed-off-by: miller-da --- tools/mdx-validate/cmd/mdx-validate/main_test.go | 2 +- .../mdx-validate/internal/validate/frontmatter.go | 14 +++++--------- tools/mdx-validate/internal/validate/runner.go | 3 +-- .../mdx-validate/internal/validate/shadow_test.go | 2 -- tools/mdx-validate/smoke-test.sh | 2 +- 5 files changed, 8 insertions(+), 15 deletions(-) diff --git a/tools/mdx-validate/cmd/mdx-validate/main_test.go b/tools/mdx-validate/cmd/mdx-validate/main_test.go index 90524ee82..450e89c3e 100644 --- a/tools/mdx-validate/cmd/mdx-validate/main_test.go +++ b/tools/mdx-validate/cmd/mdx-validate/main_test.go @@ -54,7 +54,7 @@ func TestRun_NonMDXFileIsDropped(t *testing.T) { } func TestRun_StrictPromotesWarnings(t *testing.T) { - // v0.1 has no warning-emitting validators, so this case is exercised + // No warning-emitting validators, so this case is exercised // by Counts.HasBlockingErrors directly in runner_test.go. When a // warning-emitting validator lands, expand this to invoke run() with // a fixture that produces only warnings, both with and without diff --git a/tools/mdx-validate/internal/validate/frontmatter.go b/tools/mdx-validate/internal/validate/frontmatter.go index e82fdafa1..0faf0d2f5 100644 --- a/tools/mdx-validate/internal/validate/frontmatter.go +++ b/tools/mdx-validate/internal/validate/frontmatter.go @@ -12,13 +12,10 @@ import ( ) // FrontmatterValidator checks the YAML frontmatter block at the top of an -// MDX file. v0.1 enforces: +// MDX file. Enforces: // // - The file must open with `---` and have a closing `---`. // - The block must contain a non-empty `title:` value. -// -// Other Mintlify-known keys (description, sidebarTitle, icon, …) are not -// required for v0.1 and may be added when a real docs-main case demands it. type FrontmatterValidator struct{} // Name implements Validator. @@ -34,8 +31,7 @@ var reTitleLine = regexp.MustCompile(`(?m)^title:[ \t]*(.*?)[ \t]*$`) // Validate implements Validator. // -// Known regex-parsing limitations (acceptable for v0.1; widen scope only -// once a real false positive is observed in docs-main): +// Known regex-parsing limitations: // - Block scalars whose body contains a `title:` line (e.g. `description: |` // followed by an indented `title:`) can match the inner string. // - Duplicate `title:` keys (which YAML rejects as invalid) are accepted; @@ -103,9 +99,9 @@ type frontmatterBlock struct { // - (*block, nil) when a complete frontmatter block was found. // - (nil, nil) when no frontmatter is present (legitimate "missing" case). // - (nil, err) when the scanner fails (e.g. a single line longer than -// the 1 MiB buffer). Callers should report this as a -// distinct, attributable finding rather than confusing it -// with a missing block. +// the 1 MiB buffer). Callers should report this as a +// distinct, attributable finding rather than confusing it +// with a missing block. func extractFrontmatterBlock(content []byte) (*frontmatterBlock, error) { scanner := bufio.NewScanner(bytes.NewReader(content)) scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) diff --git a/tools/mdx-validate/internal/validate/runner.go b/tools/mdx-validate/internal/validate/runner.go index 2afcdfa81..f71800b2d 100644 --- a/tools/mdx-validate/internal/validate/runner.go +++ b/tools/mdx-validate/internal/validate/runner.go @@ -12,7 +12,6 @@ import ( "strings" ) -// DefaultValidators returns the validators registered for v0.1. // New validators are added here as they land. func DefaultValidators() []Validator { return []Validator{ @@ -194,7 +193,7 @@ func expandToMDXFiles(paths []string) ([]string, error) { // SkipPath reports whether a file path should be excluded from validation. // -// v0.1 skips any file under a `snippets/` directory because Mintlify +// Skips any file under a `snippets/` directory because Mintlify // snippets are reusable content fragments that don't have frontmatter by // design (they're meant to be embedded into pages with ``). // Validating them as standalone pages produces noise without value. diff --git a/tools/mdx-validate/internal/validate/shadow_test.go b/tools/mdx-validate/internal/validate/shadow_test.go index 8afd595f1..031178e69 100644 --- a/tools/mdx-validate/internal/validate/shadow_test.go +++ b/tools/mdx-validate/internal/validate/shadow_test.go @@ -74,8 +74,6 @@ func TestLocalComponentNames(t *testing.T) { wantPresent: []string{"Tooltip"}, wantAbsent: []string{"networkData"}, }, - // --- Pinned limitations: assert current behavior so a future change - // to shadow.go's regexes is a deliberate decision, not an accident. --- { name: "type-only import is not matched (limitation)", content: "import type { Foo } from '/snippets/foo.mdx'\n", diff --git a/tools/mdx-validate/smoke-test.sh b/tools/mdx-validate/smoke-test.sh index cdb3877a5..af3d9b68c 100755 --- a/tools/mdx-validate/smoke-test.sh +++ b/tools/mdx-validate/smoke-test.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # Minimal smoke test for mdx-validate. Run from this directory after `make build`. -# Verifies the binary handles the cases that matter for v0.1: +# Verifies the binary handles: # 1. valid frontmatter passes (exit 0) # 2. missing title fails (exit 1) # 3. --version prints