From 2da4a6544a5b3193081743e7d84f3fa719fa7674 Mon Sep 17 00:00:00 2001 From: Sonu Preetam Date: Mon, 23 Mar 2026 09:57:53 -0400 Subject: [PATCH 1/5] feat(sync): add diagram block rewrite transform for Kroki rendering Signed-off-by: Sonu Preetam --- cmd/sync-content/config.go | 1 + cmd/sync-content/sync.go | 5 ++ cmd/sync-content/transform.go | 18 +++++ cmd/sync-content/transform_test.go | 107 +++++++++++++++++++++++++++++ 4 files changed, 131 insertions(+) diff --git a/cmd/sync-content/config.go b/cmd/sync-content/config.go index c0c6346..dc6a1b2 100644 --- a/cmd/sync-content/config.go +++ b/cmd/sync-content/config.go @@ -66,6 +66,7 @@ type Transform struct { InjectFrontmatter map[string]any `yaml:"inject_frontmatter"` RewriteLinks bool `yaml:"rewrite_links"` StripBadges bool `yaml:"strip_badges"` + RewriteDiagrams bool `yaml:"rewrite_diagrams"` } // loadConfig reads a sync-config.yaml file and returns the parsed configuration. diff --git a/cmd/sync-content/sync.go b/cmd/sync-content/sync.go index 4d38e36..fa033ed 100644 --- a/cmd/sync-content/sync.go +++ b/cmd/sync-content/sync.go @@ -160,6 +160,9 @@ func syncConfigSource(ctx context.Context, gh *apiClient, src Source, defaults D if file.Transform.RewriteLinks { content = rewriteRelativeLinks(content, owner, repoName, src.Branch) } + if file.Transform.RewriteDiagrams { + content = rewriteDiagramBlocks(content) + } out := []byte(content) if len(file.Transform.InjectFrontmatter) > 0 { @@ -323,6 +326,7 @@ func processRepo(ctx context.Context, gh *apiClient, org, output string, repo Re readme = shiftHeadings(readme) readme = titleCaseHeadings(readme) readme = stripBadges(readme) + readme = rewriteDiagramBlocks(readme) readme = rewriteRelativeLinks(readme, org, repo.Name, repo.DefaultBranch) } else { readme = fmt.Sprintf( @@ -448,6 +452,7 @@ func syncRepoDocPages(ctx context.Context, gh *apiClient, org string, repo Repo, content = stripLeadingH1(content) content = shiftHeadings(content) content = titleCaseHeadings(content) + content = rewriteDiagramBlocks(content) fileDir := filepath.Dir(filePath) content = rewriteRelativeLinks(content, org, repo.Name, repo.DefaultBranch, fileDir) diff --git a/cmd/sync-content/transform.go b/cmd/sync-content/transform.go index 833cdee..e4a5136 100644 --- a/cmd/sync-content/transform.go +++ b/cmd/sync-content/transform.go @@ -144,6 +144,24 @@ func injectFrontmatter(content []byte, fm map[string]any) ([]byte, error) { return buf.Bytes(), nil } +// diagramBlockRe matches fenced code blocks whose info-string is a recognised +// diagram language. The opening fence must be at the start of a line with +// exactly three backticks followed by the language name and optional whitespace. +var diagramBlockRe = regexp.MustCompile(`(?m)^` + "```" + `(mermaid|plantuml|d2|graphviz|dot|ditaa|blockdiag|seqdiag|actdiag|nwdiag|packetdiag|rackdiag|c4plantuml|erd|nomnoml|svgbob|wavedrom|vega|vegalite)\s*$`) + +// rewriteDiagramBlocks converts standard diagram code blocks (```mermaid, etc.) +// to the Kroki format (```kroki {type=mermaid}) that Hugo's Kroki render hook +// expects. The "dot" alias is normalised to "graphviz" for Kroki compatibility. +func rewriteDiagramBlocks(content string) string { + return diagramBlockRe.ReplaceAllStringFunc(content, func(match string) string { + lang := strings.TrimSpace(strings.TrimPrefix(match, "```")) + if lang == "dot" { + lang = "graphviz" + } + return "```kroki {type=" + lang + "}" + }) +} + // insertAfterFrontmatter inserts extra bytes right after the closing "---" // of YAML frontmatter. If there is no frontmatter, content is prepended. func insertAfterFrontmatter(content, insert []byte) []byte { diff --git a/cmd/sync-content/transform_test.go b/cmd/sync-content/transform_test.go index 4b7dda3..aba90e4 100644 --- a/cmd/sync-content/transform_test.go +++ b/cmd/sync-content/transform_test.go @@ -330,6 +330,113 @@ func TestRewriteRelativeLinks(t *testing.T) { }) } +func TestRewriteDiagramBlocks(t *testing.T) { + t.Run("mermaid block rewritten", func(t *testing.T) { + input := "# Doc\n\n```mermaid\ngraph TD\n A-->B\n```\n\nMore text." + result := rewriteDiagramBlocks(input) + if !strings.Contains(result, "```kroki {type=mermaid}") { + t.Errorf("mermaid block should be rewritten to kroki format, got %q", result) + } + if strings.Contains(result, "```mermaid") { + t.Error("original ```mermaid fence should not remain") + } + if !strings.Contains(result, "graph TD") { + t.Error("diagram body should be preserved") + } + }) + + t.Run("plantuml block rewritten", func(t *testing.T) { + input := "```plantuml\n@startuml\nAlice -> Bob\n@enduml\n```" + result := rewriteDiagramBlocks(input) + if !strings.Contains(result, "```kroki {type=plantuml}") { + t.Errorf("plantuml block should be rewritten, got %q", result) + } + }) + + t.Run("d2 block rewritten", func(t *testing.T) { + input := "```d2\nx -> y\n```" + result := rewriteDiagramBlocks(input) + if !strings.Contains(result, "```kroki {type=d2}") { + t.Errorf("d2 block should be rewritten, got %q", result) + } + }) + + t.Run("dot alias normalised to graphviz", func(t *testing.T) { + input := "```dot\ndigraph { a -> b }\n```" + result := rewriteDiagramBlocks(input) + if !strings.Contains(result, "```kroki {type=graphviz}") { + t.Errorf("dot should be normalised to graphviz, got %q", result) + } + }) + + t.Run("graphviz block rewritten", func(t *testing.T) { + input := "```graphviz\ndigraph { a -> b }\n```" + result := rewriteDiagramBlocks(input) + if !strings.Contains(result, "```kroki {type=graphviz}") { + t.Errorf("graphviz block should be rewritten, got %q", result) + } + }) + + t.Run("multiple diagram blocks rewritten", func(t *testing.T) { + input := "# Diagrams\n\n```mermaid\ngraph TD\n```\n\nText.\n\n```plantuml\n@startuml\n@enduml\n```" + result := rewriteDiagramBlocks(input) + if !strings.Contains(result, "```kroki {type=mermaid}") { + t.Error("first diagram block should be rewritten") + } + if !strings.Contains(result, "```kroki {type=plantuml}") { + t.Error("second diagram block should be rewritten") + } + }) + + t.Run("non-diagram code blocks unchanged", func(t *testing.T) { + input := "```go\nfunc main() {}\n```\n\n```python\nprint('hi')\n```" + result := rewriteDiagramBlocks(input) + if result != input { + t.Errorf("non-diagram code blocks should be unchanged\ngot: %q\nwant: %q", result, input) + } + }) + + t.Run("already kroki block unchanged", func(t *testing.T) { + input := "```kroki {type=mermaid}\ngraph TD\n```" + result := rewriteDiagramBlocks(input) + if result != input { + t.Errorf("already-kroki block should be unchanged\ngot: %q\nwant: %q", result, input) + } + }) + + t.Run("no code blocks unchanged", func(t *testing.T) { + input := "# Title\n\nPlain text with no code blocks." + result := rewriteDiagramBlocks(input) + if result != input { + t.Errorf("content without code blocks should be unchanged\ngot: %q\nwant: %q", result, input) + } + }) + + t.Run("closing fence not touched", func(t *testing.T) { + input := "```mermaid\ngraph TD\n A-->B\n```\n" + result := rewriteDiagramBlocks(input) + if !strings.HasSuffix(strings.TrimRight(result, "\n"), "```") { + t.Errorf("closing fence should remain unchanged, got %q", result) + } + }) + + t.Run("trailing whitespace on fence handled", func(t *testing.T) { + input := "```mermaid \ngraph TD\n```" + result := rewriteDiagramBlocks(input) + if !strings.Contains(result, "```kroki {type=mermaid}") { + t.Errorf("trailing whitespace should be handled, got %q", result) + } + }) + + t.Run("inline mermaid reference not rewritten", func(t *testing.T) { + input := "Use ```mermaid blocks for diagrams." + result := rewriteDiagramBlocks(input) + if result != input { + t.Errorf("inline reference should not be rewritten\ngot: %q\nwant: %q", result, input) + } + }) +} + func TestInsertAfterFrontmatter(t *testing.T) { t.Run("with frontmatter", func(t *testing.T) { content := []byte("---\ntitle: Test\n---\n\nBody text") From 18d4de8d9b2d9d74b0e516a912e4b16a605db593 Mon Sep 17 00:00:00 2001 From: Sonu Preetam Date: Tue, 24 Mar 2026 08:11:37 -0400 Subject: [PATCH 2/5] chore: update the spec files Signed-off-by: Sonu Preetam --- specs/006-go-sync-tool/plan.md | 135 ++++++++++++++++ specs/006-go-sync-tool/research.md | 19 +++ specs/006-go-sync-tool/spec.md | 11 +- specs/006-go-sync-tool/tasks.md | 249 +++++++++++++++++++++++++++++ 4 files changed, 409 insertions(+), 5 deletions(-) create mode 100644 specs/006-go-sync-tool/plan.md create mode 100644 specs/006-go-sync-tool/tasks.md diff --git a/specs/006-go-sync-tool/plan.md b/specs/006-go-sync-tool/plan.md new file mode 100644 index 0000000..46c3feb --- /dev/null +++ b/specs/006-go-sync-tool/plan.md @@ -0,0 +1,135 @@ +# Implementation Plan: Go Content Sync Tool + +**Branch**: `006-go-sync-tool` | **Date**: 2026-03-04 | **Spec**: [specs/006-go-sync-tool/spec.md](/specs/006-go-sync-tool/spec.md) +**Input**: Feature specification from `/specs/006-go-sync-tool/spec.md` (consolidated) + +## Summary + +Replace the config-only `cmd/sync-content` tool with the production-quality governance-driven hybrid sync tool ported from the test-website repository. The tool derives the set of eligible repositories from the org's governance registry (`peribolos.yaml` in the `.github` repo), fetches per-repo metadata via the GitHub REST API, applies Markdown transforms (heading level shifting, acronym-aware Title Case with ALL CAPS normalisation, duplicate H1 removal, badge stripping, relative link rewriting, diagram code block rewriting to Kroki format), and generates Hugo-compatible pages and landing page card data. A declarative config overlay layers file-level syncs on top. The surrounding infrastructure (gitignore, directory scaffolding, CI workflows, Hugo layouts including a render heading hook) must be adapted to consume the new tool's output. + +## Technical Context + +**Language/Version**: Go 1.25 (sync tool), Hugo 0.155.1 extended (site generator), Node.js 22 (Doks theme build) +**Primary Dependencies**: `gopkg.in/yaml.v3` (only third-party Go dep), `@thulite/doks-core` (Hugo theme), Hugo Modules +**Storage**: Filesystem — generated Markdown files and JSON; no database +**Testing**: `go test` with `net/http/httptest` for mock API server, `-race` flag for concurrency safety +**Target Platform**: Linux (CI), macOS/Linux (local dev) +**Project Type**: CLI tool (Go) embedded in a static website repo (Hugo) +**Performance Goals**: Full org sync < 60s with token; Hugo build < 2s +**Constraints**: All code in `package main` within `cmd/sync-content/` (Constitution XIV: Simplicity); third-party deps minimized — `gopkg.in/yaml.v3` is the sole dep (Constitution II) +**Scale/Scope**: 10 eligible repos in org, 10 Go source files, 10 test files + +## Constitution Check (Pre-Design) + +*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.* + +**Pre-Design Gate Result**: PASS — all 17 principles checked; all now satisfied. X (`go vet` + `gofmt` in `ci.yml`) and XV (three-workflow CI/CD model) resolved. See Post-Design Re-Check below for detailed table. + +## Project Structure + +### Documentation (this feature) + +```text +specs/006-go-sync-tool/ +├── spec.md # Feature specification +├── plan.md # This file +├── research.md # Phase 0 output +└── tasks.md # Implementation tasks +``` + +### Source Code (repository root) + +```text +complytime-website/ +├── cmd/ +│ └── sync-content/ +│ ├── main.go # Entry point and orchestration (~440 lines) +│ ├── config.go # Config types and loading (incl. Peribolos types) +│ ├── github.go # GitHub API client and types (incl. peribolos fetch) +│ ├── transform.go # Markdown transforms +│ ├── hugo.go # Hugo page and card generation +│ ├── sync.go # Sync logic and repo processing +│ ├── manifest.go # Manifest I/O and state tracking +│ ├── cleanup.go # Orphan and stale content removal +│ ├── path.go # Path validation utilities +│ ├── lock.go # Content lockfile read/write/query +│ └── *_test.go # Tests mirror source files (10 files) +├── config/ +│ └── _default/ +│ ├── hugo.toml # Site config +│ ├── module.toml # Hugo module mounts (existing) +│ ├── params.toml # Doks theme params (existing) +│ └── menus/ +│ └── menus.en.toml # Navigation menus (Projects entry exists at weight 20) +├── content/ +│ ├── docs/ +│ │ ├── projects/ +│ │ │ ├── _index.md # Hand-maintained section index (committed, has cascade for sidebar collapsing) +│ │ │ └── {repo}/ # Generated per-repo content (gitignored) +│ │ │ ├── _index.md # Section index (frontmatter only, no body) +│ │ │ ├── overview.md # README content as child page +│ │ │ └── {doc}.md # Doc pages from discovery.scan_paths +│ │ └── getting-started/ # Hand-maintained (committed) +├── data/ +│ └── projects.json # Generated landing page cards (gitignored) +├── .sync-manifest.json # Tracks written files for orphan cleanup (gitignored) +├── layouts/ +│ ├── home.html # Landing page (reads data/projects.json dynamically) +│ ├── shortcodes/ +│ │ └── project-cards.html # Project cards shortcode (type-grouped, reads data/projects.json) +│ └── docs/ +│ └── list.html # Docs list with sidebar (already exists) +├── .github/ +│ └── workflows/ +│ ├── deploy-gh-pages.yml # Deploy pipeline (sync at locked SHAs, Hugo build, GitHub Pages) +│ ├── ci.yml # PR validation (test, sync with --lock, build) +│ └── sync-content-check.yml # Weekly content check (--update-lock, PR creation) +├── sync-config.yaml # Declarative file sync manifest (updated) +├── .content-lock.json # Approved upstream SHAs per repo (committed) +├── go.mod # Go module (initialized fresh for the port) +├── go.sum # Go checksums (generated by go mod tidy) +└── .gitignore # Updated with generated path exclusions +``` + +**Structure Decision**: Single-project layout. The sync tool is organized as multiple files within `package main` at `cmd/sync-content/` (10 source files). No separate packages, no `internal/`, no `pkg/`. Files are split by domain (config, GitHub API, transforms, Hugo pages, sync logic, manifest, cleanup, path utils, content lockfile, entry point). Tests mirror source files 1:1. This matches Constitution XIV (Simplicity) — no unnecessary abstractions while keeping each file focused. + +## Constitution Re-Check (Post Phase 1 Design) + +| Principle | Status | Notes | +|-----------|--------|-------| +| I. Hugo + Doks | PASS | No changes to site framework. | +| II. Go Tooling | PASS | Third-party Go dependencies minimized; `gopkg.in/yaml.v3` is the sole dep (also used for peribolos parsing). | +| III. Single Source of Truth | PASS | Content sourced from GitHub API. Governance registry (`peribolos.yaml`) is authoritative for which repos exist (Constitution v1.5.0). | +| IV. Governance-Driven Discovery with Config Overlay | PASS | Repo list derived from `peribolos.yaml`; per-repo metadata from API; `sync-config.yaml` overlay for precision (Constitution v1.5.0). | +| V. No Runtime JS Frameworks | PASS | Diagram code blocks are rewritten to Kroki format (`render-codeblock-kroki.html`) for server-side rendering rather than using Doks' client-side `render-codeblock-mermaid.html`. No custom JavaScript added. | +| VI. Match ComplyTime Brand | PASS | `layouts/home.html` uses dynamic project cards from `data/projects.json`. Visual styling and brand consistency preserved. | +| VII. Responsive and Accessible | PASS | No layout changes required. | +| VIII. Performance | PASS | Hugo build < 2s, sync < 60s targets achievable. | +| IX. SPDX License Headers | PASS | Present in all `.go` source and test files. | +| X. Go Code Quality | PASS | `go vet` + `gofmt` checks run in `deploy-gh-pages.yml`; `go test -race` in both CI and deploy. | +| XI. Structured Logging | PASS | All `log/slog` with structured fields. | +| XII. Dry-Run by Default | PASS | `--write` required for disk I/O. Dry-run validated. | +| XIII. Generated Content Not Committed | PASS | `.gitignore` updated (T001). `.content-lock.json` is a committed control file, not derived content. | +| XIV. Simplicity | PASS | All code in `package main`, no unnecessary packages or abstractions. | +| XV. GitHub Actions CI/CD | PASS | Three-workflow model: CI, Content Sync Check, Deploy. | +| XVI. GitHub Pages Hosting | PASS | No hosting changes. | +| XVII. Apache 2.0 | PASS | SPDX headers present. | + +**Post-Design Gate Result**: PASS. All 17 principles satisfied per Constitution v1.5.0. Principle IV updated from API-based org scan to governance-driven discovery via peribolos.yaml (IS-001). + +## Hardening (Post-Audit) + +A code audit of `cmd/sync-content/` identified 18 findings across security, logic, redundancy, performance, and flexibility. These were cross-referenced against the spec, plan, and existing tasks — none were previously tracked. + +**In-scope for feature 006** (10 tasks, T028–T037): +- **Tier 1 — Security & Correctness**: Path traversal via config `dest` (T028), context-cancellation gap in retry sleep (T029), incomplete stale cleanup (T030) +- **Tier 2 — Defensive Coding**: Unbounded error body read (T031), URL escaping (T032), dry-run card building (T033) +- **Tier 3 — Redundancy Removal**: Duplicated card builder (T034), dead branch fallback (T035), hardcoded exclude list (T036) +- **Hardening Tests**: T037 covers path traversal rejection, ctx cancellation, stale cleanup completeness + +**Deferred** (7 findings — design improvements, not bugs at current scale): +- Serial recursive API calls (#11), HTTP connection pooling (#12), hardcoded API URL (#14), public-only repos (#15), no log level control (#17), no config schema version (#18). Finding #13 (redundant README fetch in discovery) is N/A after discovery mode removal in T054. + +## Complexity Tracking + +No constitution violations. All design choices align with established principles. Hardening phase adds security and correctness fixes without introducing new dependencies or abstractions — consistent with Constitution XIV (Simplicity). Phase 13 (content transform improvements) adds heading casing normalisation, ALL CAPS normalisation, duplicate H1 removal, and a Hugo render heading hook — all within existing files, no new packages or dependencies. Phase 14 (diagram block rewriting) adds Kroki format conversion for upstream diagram code blocks, routing mermaid through server-side Kroki rather than client-side JS — consistent with Constitution V (No Runtime JavaScript Frameworks). diff --git a/specs/006-go-sync-tool/research.md b/specs/006-go-sync-tool/research.md index 3679d8f..32a2d6b 100644 --- a/specs/006-go-sync-tool/research.md +++ b/specs/006-go-sync-tool/research.md @@ -112,3 +112,22 @@ go run ./cmd/sync-content --org complytime --config sync-config.yaml --lock .con **Alternatives considered**: - Committing an empty `data/projects.json` (`[]`) — rejected per Constitution XIII. - Adding a `.gitkeep` in `data/` — unnecessary since Hugo handles the absence gracefully. + +## R8: Diagram Code Block Rewriting (Kroki vs Client-Side Mermaid) + +**Decision**: Rewrite all upstream diagram code blocks (mermaid, plantuml, d2, graphviz/dot, ditaa, and 12 other languages) from their native fenced format (`` ```mermaid ``) to Kroki format (`` ```kroki {type=mermaid} ``) during content sync. The `dot` alias is normalised to `graphviz` for Kroki compatibility. + +**Rationale**: Doks ships two relevant render hooks: +- `render-codeblock-mermaid.html` — renders mermaid diagrams via client-side JavaScript +- `render-codeblock-kroki.html` — renders diagrams server-side via the Kroki API (`krokiURL` in `params.toml`) + +Routing mermaid through Kroki instead of the native mermaid hook avoids adding client-side JavaScript to diagram rendering, upholding Constitution V (No Runtime JavaScript Frameworks). Kroki also supports 17 diagram languages (plantuml, d2, graphviz, ditaa, etc.) that have no Doks render hook at all, so a single rewrite handles all diagram types uniformly. + +The transform is applied unconditionally in `processRepo` and `syncRepoDocPages` (all org-discovered content), and conditionally in `syncConfigSource` (gated by `rewrite_diagrams` config flag). + +**Infrastructure dependency**: Requires `krokiURL = "https://kroki.io"` in `config/_default/params.toml` and the `@thulite/doks-core` module mount (which provides `render-codeblock-kroki.html`). Both are already configured. + +**Alternatives considered**: +- Leaving native `` ```mermaid `` blocks as-is — rejected because Doks' `render-codeblock-mermaid.html` uses client-side JS (violates Constitution V), and non-mermaid diagrams (plantuml, d2, etc.) would not render at all. +- Adding custom Hugo render hooks per diagram language — rejected per Constitution I (no theme forking) and XIV (simplicity). Kroki handles all languages through one hook. +- Client-side Kroki rendering via JavaScript — rejected per Constitution V. diff --git a/specs/006-go-sync-tool/spec.md b/specs/006-go-sync-tool/spec.md index 1a91227..797bf69 100644 --- a/specs/006-go-sync-tool/spec.md +++ b/specs/006-go-sync-tool/spec.md @@ -7,9 +7,9 @@ The ComplyTime website (`complytime.dev`) documents a growing ecosystem of open-source compliance tools hosted across multiple repositories in the `complytime` GitHub organization. Before this feature, project documentation was manually copied into the site — error-prone, inconsistent, and unable to scale as new repos were added. -This feature replaces that workflow with a Go CLI tool (`cmd/sync-content/`, ~2,100 lines across 10 source files in `package main`) that derives the set of eligible repositories from the org's governance registry (`peribolos.yaml` in the `.github` repo), fetches their README content and per-repo metadata via the GitHub REST API, applies Markdown transforms (heading level shifting, Title Case normalisation with acronym awareness and ALL CAPS normalisation, badge stripping, relative link rewriting), and generates Hugo-compatible pages and landing page card data. A declarative config overlay (`sync-config.yaml`) provides precision control for repos needing custom documentation layouts. +This feature replaces that workflow with a Go CLI tool (`cmd/sync-content/`, 10 source files in `package main`) that derives the set of eligible repositories from the org's governance registry (`peribolos.yaml` in the `.github` repo), fetches their README content and per-repo metadata via the GitHub REST API, applies Markdown transforms (heading level shifting, Title Case normalisation with acronym awareness and ALL CAPS normalisation, badge stripping, relative link rewriting, diagram code block rewriting to Kroki format), and generates Hugo-compatible pages and landing page card data. A declarative config overlay (`sync-config.yaml`) provides precision control for repos needing custom documentation layouts. -**Dependencies**: Go 1.25+, `gopkg.in/yaml.v3` (sole third-party Go dep), Hugo 0.155.1 extended, Node.js 22. +**Dependencies**: Go 1.25+, `gopkg.in/yaml.v3` (sole third-party Go dep), Hugo 0.155.1 extended, Node.js 22. Diagram rendering requires `@thulite/doks-core`'s `render-codeblock-kroki.html` hook and `krokiURL` in `params.toml` (external service: `https://kroki.io`). ## Scope @@ -23,10 +23,10 @@ This feature replaces that workflow with a Go CLI tool (`cmd/sync-content/`, ~2, | IS-002 | README fetch with base64 decoding and SHA tracking | | IS-003 | Per-repo page generation: section index (`_index.md`, frontmatter only, with `formatRepoTitle` for `title` and raw repo name as `linkTitle` for sidebar; ALL CAPS repo/file names normalised to Title Case) + overview page (`overview.md`, README content) | | IS-004 | Landing page card generation (`data/projects.json`) with type derivation from topics | -| IS-005 | Config-driven file sync with transforms (`inject_frontmatter`, `rewrite_links`, `strip_badges`); heading shift and Title Case applied unconditionally to all synced content | +| IS-005 | Config-driven file sync with transforms (`inject_frontmatter`, `rewrite_links`, `strip_badges`, `rewrite_diagrams`); all synced content (org-discovered and config-driven) unconditionally receives `stripLeadingH1`, `shiftHeadings`, `titleCaseHeadings`, `stripBadges`, `rewriteDiagramBlocks`, and `rewriteRelativeLinks` | | IS-006 | Concurrent processing with bounded worker pool (`--workers`) | | IS-007 | Dry-run by default; `--write` flag required for disk I/O | -| IS-008 | Markdown transforms: `stripLeadingH1` (removes leading H1 — title already in frontmatter), `shiftHeadings` (H1→H2, H2→H3, …), `titleCaseHeadings` (acronym-aware Title Case for in-page headings and TOC; normalises ALL CAPS words to Title Case while preserving known acronyms from the `knownAcronyms` map in `hugo.go` — ~30 domain terms; maintainers add entries as new projects introduce terminology), `stripBadges`, `rewriteRelativeLinks` | +| IS-008 | Markdown transforms: `stripLeadingH1` (removes leading H1 — title already in frontmatter), `shiftHeadings` (H1→H2, H2→H3, …), `titleCaseHeadings` (acronym-aware Title Case for in-page headings and TOC; normalises ALL CAPS words to Title Case while preserving known acronyms from the `knownAcronyms` map in `hugo.go` — ~30 domain terms; maintainers add entries as new projects introduce terminology), `stripBadges`, `rewriteRelativeLinks`, `rewriteDiagramBlocks` (converts fenced diagram code blocks — mermaid, plantuml, d2, graphviz/dot, ditaa, and other Kroki-supported languages — to `kroki {type=…}` format for server-side rendering via Doks' `render-codeblock-kroki.html` hook; `dot` normalised to `graphviz`; routes mermaid through Kroki rather than Doks' client-side `render-codeblock-mermaid.html` to uphold Constitution V) | | IS-009 | Repo filtering: `--include`/`--exclude` lists (peribolos is the governance gate; no API metadata filtering) | | IS-012 | Sync manifest (`.sync-manifest.json`) for orphan file tracking | | IS-014 | Doc page auto-sync from `discovery.scan_paths` directories | @@ -215,10 +215,11 @@ All criteria must pass before feature 006 merges to `main`. | SC-014 | `--lock` gates content to approved SHAs; unapproved repos are skipped | `lock_test.go`, `sync_test.go` (`TestProcessRepo_LockedSHA`) | | SC-015 | `--update-lock` writes current upstream SHAs to lockfile | `lock_test.go` (`TestWriteLock`, `TestWriteLock_DeterministicOrder`) | | SC-016 | Weekly check workflow creates/updates a PR with lockfile changes | `sync-content-check.yml` manual dispatch | +| SC-017 | Diagram code blocks in upstream content are rewritten to Kroki format and render server-side (not via client-side JS) | `TestRewriteDiagramBlocks` (12 subtests), `sync.go` pipeline integration (3 call sites) | ## Merge Readiness Gate -All 16 success criteria (SC-001 through SC-016) MUST pass before merging feature 006 to `main`. SC-006 is deferred (blocked on config sources being declared) but its code paths are covered by unit tests (`TestSyncConfigSource`, `TestProcessRepo`). SC-016 requires a manual `workflow_dispatch` run of `sync-content-check.yml` after merge. +All 17 success criteria (SC-001 through SC-017) MUST pass before merging feature 006 to `main`. SC-006 is deferred (blocked on config sources being declared) but its code paths are covered by unit tests (`TestSyncConfigSource`, `TestProcessRepo`). SC-016 requires a manual `workflow_dispatch` run of `sync-content-check.yml` after merge. ## Appendix: Legacy ID Cross-Reference diff --git a/specs/006-go-sync-tool/tasks.md b/specs/006-go-sync-tool/tasks.md new file mode 100644 index 0000000..b34823d --- /dev/null +++ b/specs/006-go-sync-tool/tasks.md @@ -0,0 +1,249 @@ +# Tasks: Go Content Sync Tool + +**Input**: Design documents from `/specs/006-go-sync-tool/` +**Prerequisites**: plan.md (required), spec.md (required), research.md + +**Tests**: Unit tests are required per SC-008. Included in Phase 7 (US6). Hardening tests added in Phase 8 based on code audit findings. + +**Organization**: Tasks are grouped by user story to enable independent implementation and testing of each story. The core sync tool is already ported and functional (IS-001 through IS-005 Done). Remaining work is infrastructure integration, CI/CD, tests, and hardening. + +## Format: `[ID] [P?] [Story] Description` + +- **[P]**: Can run in parallel (different files, no dependencies) +- **[Story]**: Which user story this task belongs to (e.g., US1, US2, US3) +- **[DEFERRED]**: Blocked on an external precondition; not executable yet +- Include exact file paths in descriptions + +**ID Gaps**: T002 was consolidated into T001 during initial planning. T011 and T012 were merged into the remediation phase (T022, T023) when cross-artifact analysis revealed they overlapped. IDs are not renumbered to preserve external references. + +## Phase 1: Setup (Shared Infrastructure) + +**Purpose**: Gitignore patterns and section scaffolding that all user stories depend on + +- [x] T001 Update `.gitignore` with exclusion patterns for generated content: add `content/docs/projects/*/` (generated repo pages) and `data/projects.json` (landing page cards). *(Done — `.gitignore` already contains correct glob patterns that inherently preserve hand-maintained `_index.md` files. Validates NFR-007.)* + +**Checkpoint**: `.gitignore` prevents generated content from being committed. Hugo recognizes `content/docs/projects/` as a content section. + +--- + +## Phase 2: User Story 1 — Safe Local Preview (Priority: P1) 🎯 MVP + +**Goal**: A contributor clones the repo, runs the sync tool, and previews the full site locally. Dry-run is the default; `--write` is required for disk I/O. + +**Independent Test**: Run `go run ./cmd/sync-content --org complytime --config sync-config.yaml` and verify zero files are created. Then run with `--write` and verify content appears. Then run `hugo server` and verify zero build errors. + +- [x] T003 [US1] Validate dry-run mode: run `go run ./cmd/sync-content --org complytime --config sync-config.yaml` and confirm zero files created in `content/docs/projects/` or `data/`. Tool should log intended actions without writing. *(Done — dry-run exits 0 with "dry run complete, no files were written". Logs intended syncs for 4 eligible repos.)* +- [x] T004 [US1] Validate write mode: run `go run ./cmd/sync-content --org complytime --config sync-config.yaml --write` and confirm: (a) section indexes appear at `content/docs/projects/{repo}/_index.md` (frontmatter only, no body), (b) overview pages appear at `content/docs/projects/{repo}/overview.md` (README content), (c) doc sub-pages appear for repos with `docs/` directories matching `discovery.scan_paths`, (d) `data/projects.json` is written, (e) `.sync-manifest.json` is written. Verify sync duration stays under 60s with token (NFR-001). *(Done — all 5 output artifacts verified. 54 files tracked in manifest. 4 repos with section index + overview + doc sub-pages. Sync completes well within the 60s NFR-001 target with authenticated token.)* +- [x] T005 [US1] Validate Hugo build: run `npm run dev` (or `hugo server`) after sync and confirm zero build errors. Verify project pages are accessible at `/docs/projects/`. *(Done — `hugo --minify --gc` succeeds with 95 pages in 1072ms. Project pages built at `/docs/projects/`.)* + +**Checkpoint**: Full local development workflow works end-to-end. Contributors can safely preview the site. + +--- + +## Phase 3: User Story 2 — Org-Wide Auto-Discovery (Priority: P1) + +**Goal**: New repos in the complytime org automatically get project pages and landing page cards without config changes. + +**Independent Test**: After running sync with `--write`, verify repos NOT in `sync-config.yaml` (e.g., `complytime-demos`, `gemara-content-service`) have generated pages and appear in `data/projects.json`. + +- [x] T006 [P] [US2] Verify auto-discovered repos produce pages: check that repos not declared in `sync-config.yaml` have: (a) `content/docs/projects/{repo}/_index.md` with metadata frontmatter (`title`, `description`, `params.language`, `params.stars`, `params.source_sha`, `params.readme_sha`) and no body content, (b) `content/docs/projects/{repo}/overview.md` with README content (headings shifted, badges stripped, relative links rewritten). *(Done — all 4 eligible repos (complyctl, complyscribe, complytime-collector-components, gemara-content-service) have both files with correct frontmatter.)* +- [x] T007 [P] [US2] Verify `data/projects.json` completeness: confirm file contains a `ProjectCard` entry for every eligible peribolos repo, sorted alphabetically, with fields `name`, `language`, `type`, `description`, `url`, `repo`, `stars`. *(Done — 4 cards sorted alphabetically with all required fields.)* + +**Checkpoint**: Zero-config discovery works. All eligible org repos are visible on the site. + +--- + +## Phase 4: User Story 3 — Config-Driven Precision Sync (Priority: P1) + +**Goal**: Config overlay provides precise control over file sync destinations, frontmatter, and transforms for key projects (complyctl, complyscribe, collector-components) without breaking the org scan baseline. + +**Independent Test**: Verify config-declared files appear at their `dest` paths in `sync-config.yaml` with injected frontmatter and applied transforms. + +- [ ] T008 [P] [US3] [DEFERRED] Verify `skip_org_sync` behavior: for repos with `skip_org_sync: true` in `sync-config.yaml`, confirm no auto-generated section index (`_index.md`) or overview page (`overview.md`) exists at `content/docs/projects/{repo}/` BUT the repo's `ProjectCard` is present in `data/projects.json`. *Blocked — `sync-config.yaml` currently has `sources: []`. Unblocked when sources are declared for specific repos. Code paths covered by unit tests (`TestSyncConfigSource`, `TestProcessRepo`).* +- [ ] T009 [P] [US3] [DEFERRED] Verify config file transforms: check config-declared files at their `dest` paths have correct `inject_frontmatter` (YAML frontmatter with title, description, weight), `rewrite_links` (relative links converted to absolute GitHub URLs), `strip_badges` (CI badge lines removed), and `rewrite_diagrams` (diagram code blocks converted to Kroki format) per `sync-config.yaml` transform declarations. *Blocked — `sync-config.yaml` currently has `sources: []`. Unblocked when sources are declared. Code paths covered by unit tests (`TestInjectFrontmatter`, `TestStripBadges`, `TestRewriteRelativeLinks`, `TestRewriteDiagramBlocks`).* + +**Checkpoint**: Hybrid mode works — org scan provides the baseline, config overlay provides precision. + +--- + +## Phase 5: Verify Inherited — Change Detection and Stale Cleanup (Priority: P2) + +**Goal**: Verify the inherited change detection and stale cleanup capabilities work correctly in the complytime-website context. No implementation needed — these are built into the ported tool (see spec "Inherited Capabilities"). + +**Independent Test**: Run the sync tool twice. On the second run, verify unchanged repos are logged as "unchanged". Simulate removal and verify cleanup removes all generated files (section index, overview, doc sub-pages). + +- [x] T010 [US4] Verify change detection and stale cleanup: run sync with `--write` twice in succession. On the second run, confirm unchanged repos show "unchanged" in structured log output (no disk writes). Verify `syncResult` counters report correct `added`/`updated`/`unchanged` counts. Additionally, verify stale cleanup removes all generated files for a removed repo — not just `_index.md` but also `overview.md` and any doc sub-pages under the repo directory. *(Done — manifest-based tracking verified (54 entries). Stale cleanup uses manifest-based `cleanOrphanedFiles` (T030). Change detection tested by `TestProcessRepo_BranchUnchanged` and `TestProcessRepo_BranchChangedReadmeUnchanged`.)* + +**Checkpoint**: Change detection prevents redundant writes. Stale cleanup keeps the site clean. + +--- + +## Phase 6: User Story 5 — CI/CD Pipeline Integration (Priority: P2) + +**Goal**: The sync tool runs in GitHub Actions. The deploy workflow generates fresh content before Hugo builds. A CI workflow validates PRs with lint, test, dry-run, and build. + +**Independent Test**: Verify deploy workflow includes sync step. Verify CI workflow runs `go vet`, `gofmt` check, `go test -race`, sync dry-run, and Hugo build. + +- [x] T013 [US5] Update deploy workflow in `.github/workflows/deploy-gh-pages.yml`: add `actions/setup-go` step (Go 1.25+), add sync step (`go run ./cmd/sync-content --org complytime --config sync-config.yaml --lock .content-lock.json --write`) before Hugo build, pass `GITHUB_TOKEN` to sync step for authenticated API access. Trigger on push to `main` and `workflow_dispatch` per Constitution XV (v1.3.0). Preserve existing Node.js setup, Hugo setup, and GitHub Pages deploy steps. *(Done — `deploy-gh-pages.yml` has all required steps. Daily cron removed in T043 in favour of PR-gated content sync.)* +- [x] T014 [P] [US5] Create CI workflow for PR validation in `.github/workflows/ci.yml`: trigger on `pull_request` to `main`. Steps: checkout, setup Go, setup Node.js, setup Hugo, `npm ci`, `go test -race ./cmd/sync-content/...`, content sync with `--lock` (`go run ./cmd/sync-content --org complytime --config sync-config.yaml --lock .content-lock.json --write`), Hugo build (`hugo --minify --gc`). `go vet` and `gofmt` checks run in `deploy-gh-pages.yml` instead. *(Done — `ci.yml` created with all required steps and SHA-pinned actions.)* + +**Checkpoint**: Production deploys always use fresh synced content. PRs are validated with lint, tests, and build. + +--- + +## Phase 7: User Story 6 — Concurrent Processing with Race Safety (Priority: P3) + +**Goal**: Unit tests verify core sync functions and concurrent processing passes race detection. + +**Independent Test**: `go test -race ./cmd/sync-content/...` passes with zero data race warnings. + +- [x] T015 [US6] Write unit tests for pure functions in `cmd/sync-content/*_test.go`: test `loadConfig`, `injectFrontmatter`, `stripBadges`, `shiftHeadings`, `rewriteRelativeLinks`, `isValidRepoName`. SPDX license header present. See research R6. *(Done — 22 test functions at this phase; 57 total across 10 `*_test.go` files after T037 hardening, T045–T046 lockfile additions, T053 peribolos tests, T054 discovery removal, helpers_test.go shared utilities, and Phase 13 heading/casing transforms.)* +- [x] T016 [US6] Write integration tests with httptest mock in `cmd/sync-content/*_test.go`: mock GitHub API responses for org listing and README fetch. Test `processRepo` end-to-end, `syncConfigSource`, manifest round-trip, orphan cleanup. See research R6. *(Done — `TestProcessRepo`, `TestSyncConfigSource`, `TestManifestRoundTrip`, `TestCleanOrphanedFiles`, etc.)* +- [x] T017 [US6] Run `go test -race ./cmd/sync-content/...` and verify zero data race warnings. Fix any races found in `syncResult` mutex usage or `cards` slice access. *(Done — all tests pass with `-race` flag, zero data race warnings.)* + +**Checkpoint**: Core functions have test coverage. Concurrent processing is race-free. + +--- + +## Phase 8: Hardening (Security, Defensive Coding, Code Quality) + +**Purpose**: Address security vulnerabilities, logical bugs, and code quality issues identified by code audit of `cmd/sync-content/`. These findings were not covered by the original spec, plan, or existing tasks — the existing task set focused on happy-path verification while these address adversarial inputs, edge-case correctness, and defensive coding. + +**Audit Reference**: Findings cross-referenced against spec (Inherited Capabilities, Success Criteria), plan (Constitution Check), and existing tasks. Tier 1 tasks (T028–T030) map to spec security requirements (SEC-001) and success criteria (SC-011–SC-013). Tier 2–3 tasks (T031–T036) are code quality improvements justified by the Audit Findings Traceability table below — they do not require formal spec requirements. + +### Tier 1: Security & Correctness (should block merge) + +- [x] T028 Add path traversal guard in `cmd/sync-content/`: create an `isUnderDir(base, target string) bool` validation function that resolves both paths via `filepath.Abs` + `filepath.Clean` and confirms `target` is under `base`. Apply to all disk-write call sites: `syncConfigSource` (config `dest` field), `syncRepoDocPages` (API-sourced file paths), `processRepo` (section index and overview paths). Return an error and increment `result.errors` for any path that escapes the `--output` directory. *(Done — `isUnderDir` applied to 4 write sites, tested by `TestIsUnderDir` and `TestPathTraversalRejection`.)* +- [x] T029 Add context-aware retry sleep in `cmd/sync-content/`: replace `time.Sleep(wait)` in `apiClient.getJSON` with a `select` on `ctx.Done()` and `time.After(wait)`. Return `ctx.Err()` if the context is cancelled during backoff. *(Done — `select { case <-ctx.Done(): ... case <-time.After(wait): }`, tested by `TestContextCancellationDuringRetry`.)* +- [x] T030 Ensure stale content cleanup removes all generated files in `cmd/sync-content/`: `cleanOrphanedFiles` uses per-file `os.Remove` with manifest diffing and empty-directory pruning — when all files for a removed repo are orphaned, the entire repo directory (including overview.md and doc sub-pages) is cleaned up and empty parent directories are pruned. *(Done — tested by `TestCleanOrphanedFiles`, `TestCleanOrphanedFiles_PrunesEmptyDirs`, and `TestCleanOrphanedFiles_LegitimateRemoval` in `cleanup_test.go`.)* + +### Tier 2: Defensive Coding (code quality) + +- [x] T031 [P] Bound error response body read in `cmd/sync-content/`: replace `io.ReadAll(resp.Body)` with `io.ReadAll(io.LimitReader(resp.Body, 4096))` in `apiClient.getJSON`. *(Done — 4KB limit applied.)* +- [x] T032 [P] Add URL path escaping in API methods in `cmd/sync-content/`: add `net/url` import and apply `url.PathEscape()` to org name, repo name, branch name, and file path components in all API methods. *(Done — `escapePathSegments` helper + `url.PathEscape` in `fetchPeribolosRepos`, `getRepoMetadata`, `getREADME`, `getFileContent`, `listDir`, `getBranchSHA`, tested by `TestEscapePathSegments`.)* +- [x] T033 [P] Always return ProjectCard from `processRepo` in `cmd/sync-content/`: modify both dry-run paths to return a `*repoWork` with a populated `ProjectCard` instead of `nil`. *(Done — both fast path and slow path return `buildProjectCard(repo)`, tested by `TestDryRunReturnsCard`.)* + +### Tier 3: Redundancy Removal + +- [x] T034 [P] Extract `buildProjectCard` helper in `cmd/sync-content/`: refactor duplicated `ProjectCard` struct literals into a `buildProjectCard(repo Repo) ProjectCard` function. *(Done — single function replaces 3 call sites, tested by `TestBuildProjectCard`.)* +- [x] T035 [P] Remove dead branch fallback in `syncConfigSource` in `cmd/sync-content/`: delete the dead `if branch == ""` check. *(Done — `syncConfigSource` now uses `src.Branch` directly; `loadConfig` guarantees it's populated.)* +- [x] T036 [P] Unify `--exclude` flag default with config `discovery.ignore_repos` in `cmd/sync-content/` and `sync-config.yaml`: move the 7-item hardcoded `--exclude` default into `discovery.ignore_repos` in `sync-config.yaml`. *(Done — `--exclude` default is now empty string; `main()` merges `--exclude` with `cfg.Discovery.IgnoreRepos`. 7 repos moved to `sync-config.yaml`.)* + +### Hardening Tests + +- [x] T037 Add hardening tests in `cmd/sync-content/*_test.go`: (a) `TestPathTraversalRejection`, (b) `TestContextCancellationDuringRetry`, (c) `TestCleanOrphanedFiles_LegitimateRemoval`, plus `TestIsUnderDir`, `TestBuildProjectCard`, `TestEscapePathSegments`, `TestDryRunReturnsCard`. *(Done — 7 new test functions added. All pass with `-race`.)* + +**Checkpoint**: All security vulnerabilities patched. Defensive coding prevents memory exhaustion and URL injection. Code quality improved with deduplication and dead code removal. Hardening tests verify all fixes. + +--- + +## Phase 9: Polish & Cross-Cutting Concerns + +**Purpose**: Documentation, code quality, and final validation across all stories + +- [x] T018 [P] Update `CONTRIBUTING.md` with sync tool developer workflow: prerequisites (Go 1.25+), running sync tool locally, CLI flags reference, running tests. *(Done — Prerequisites, Getting Started, Project Structure, Quick Reference, CI/CD, and PR Checklist sections all updated.)* +- [x] T019 [P] Run `go vet ./...` and confirm zero issues. Run `gofmt -l ./cmd/sync-content/` and confirm no unformatted files. *(Done — both pass with zero output.)* +- [x] T020 Validate end-to-end build pipeline: confirm `go mod verify`, `go build ./cmd/sync-content`, `go vet`, `go test -race`, and `hugo --minify --gc` all succeed. Covers SC-001 and SC-002. *(Done — `go mod verify` → "all modules verified", `go build` succeeds, `go vet` clean, `go test -race` passes, `hugo --minify --gc` succeeds.)* +- [x] T021 Final sweep of implementation status table in `specs/006-go-sync-tool/spec.md`: verify all Done items have evidence notes (commit hash, test name, or task ID), update any newly completed items from Pending to Done, and add a completion date entry to the changelog. *(Done — implementation status items updated to Done with test/task evidence. Changelog entry added. See spec Appendix: Legacy ID Cross-Reference for IS-060–IS-065 mapping.)* + +--- + +## Phase 10: Remediation (Cross-Artifact Consistency Fixes) + +**Purpose**: Address gaps identified by cross-artifact analysis. These tasks ensure the spec, plan, and implementation are internally consistent. + +- [x] T022 [US2] Rewrite `layouts/home.html` Projects section to read `data/projects.json`: Hugo `range` over `site.Data.projects` with `$langColors` dict mapping. Responsive card grid preserved. *(Done — `home.html` uses dynamic project cards from `data/projects.json`. Validates IS-040.)* +- [x] T023 [P] Remove hand-maintained committed project docs from git tracking: run `git rm --cached` for any project documentation files under `content/docs/projects/` that are now generated by the sync tool. Validates Constitution III (Single Source of Truth). *(Done — all 16 project docs files were deleted in commit `7ff850a` ("CPLYTM-1291 sync tool"). Files recoverable from `main` branch if needed.)* +- [x] T024 [P] [US5] Verify CI integration outputs: after T013, run the sync tool with `GITHUB_OUTPUT` and `GITHUB_STEP_SUMMARY` set to temp files. Confirm `GITHUB_OUTPUT` contains `has_changes=true|false`, `changed_count=N`, `error_count=N`. Confirm `GITHUB_STEP_SUMMARY` contains a markdown sync summary. Test `--summary report.md` flag. Validates IS-018 and the CI integration capabilities (GITHUB_OUTPUT variables, step summary, and summary file). *(Done — `writeGitHubOutputs` and `syncResult.toMarkdown` exercised by integration-level runs; `toMarkdown()` has no dedicated unit test. Live verification deferred to CI with GITHUB_TOKEN.)* +- [x] T025 [P] [US2] Verify landing page renders dynamic project cards: run sync with `--write` and start `hugo server`. Confirm the landing page "Our Projects" section displays cards generated from `data/projects.json`. Confirm new repos added to the org would appear automatically. Validates IS-040. *(Done — Hugo build output `public/index.html` contains project references from `data/projects.json`.)* +- [x] T026a [P] Add Hugo cascade block to `content/docs/projects/_index.md`: push `sidebar.collapsed: true` to repo-level section pages via `_target: {kind: section, path: "{/docs/projects/*}"}`. Doks template reads `.Params.sidebar.collapsed` natively. No sync tool or template changes needed. See research R3a. *(Done — cascade block added to `content/docs/projects/_index.md` frontmatter. Validates IS-041 implementation.)* +- [x] T026b [P] Verify docs sidebar shows synced project pages with collapsed sections: after sync with `--write`, run `hugo --minify --gc` and confirm (a) `public/docs/projects/` contains per-repo directories with `index.html`, (b) cascade block in `content/docs/projects/_index.md` sets `sidebar.collapsed: true` for repo-level sections (verified by frontmatter inspection), (c) Hugo build produces zero errors. Visual confirmation via `hugo server` that repo-level sections are collapsed by default and sub-folders remain expanded. Validates IS-041. *(Done — Hugo build: 95 pages, 969ms, zero errors. `public/docs/projects/complyctl/index.html` confirmed. Cascade block verified in `content/docs/projects/_index.md` frontmatter.)* +- [x] T027 Sync constitution memory file with live constitution: `.specify/memory/constitution.md` updated to match `.specify/constitution.md` v1.5.0. Transitional provisions removed, Principles III and IV updated for governance-driven discovery. Validates Constitution III and IV (v1.5.0). *(Done — memory file synced to v1.5.0.)* + +**Checkpoint**: All cross-artifact inconsistencies resolved. T026a (cascade block) applied. T026b (sidebar visual verification) done. + +--- + +## Phase 11: Content Approval Gate (US7) + +**Purpose**: Implement a Dependabot-style content approval gate so upstream documentation changes require human review before reaching production. Replaces the daily cron deploy model with a lockfile + PR workflow. + +**Ref**: IS-070, SC-014–SC-016, Constitution XV (v1.3.0) + +- [x] T038 [US7] Create `cmd/sync-content/lock.go`: `ContentLock` struct (`repos` map[string]string), `readLock(path)`, `writeLock(path, lock)` with deterministic JSON output (sorted keys, indented), `sha(repo)` helper. SPDX header. *(Done — `lock.go` created, tested by `lock_test.go`.)* +- [x] T039 [US7] Add `ref` parameter to GitHub API methods in `cmd/sync-content/github.go`: create `appendRef(apiURL, ref string) string` helper. Thread `ref string` through `getREADME`, `getFileContent`, `listDir`, `listDirMD`, `listDirMDDepth`. When `ref` is empty, no query parameter is added (preserves existing behavior). *(Done — all 5 API methods updated, `appendRef` tested by `TestAppendRef`.)* +- [x] T040 [US7] Add `--lock` and `--update-lock` CLI flags to `cmd/sync-content/main.go`: load lockfile on startup, gate repos not in lockfile when `--lock` is active (skip with log), thread `lockedSHA` through `processRepo`, collect upstream SHAs via `sync.Map`, write updated lockfile when `--update-lock` is set. *(Done — flags integrated, tested end-to-end.)* +- [x] T041 [US7] Thread `lockedSHA`/`ref` through sync functions in `cmd/sync-content/sync.go`: `processRepo` accepts `lockedSHA string`, derives `fetchRef` when locked SHA differs from upstream. `syncConfigSource` and `syncRepoDocPages` accept `ref string` and pass to API methods. *(Done — all callers updated.)* +- [x] T042 [US7] Create `.github/workflows/sync-content-check.yml`: weekly cron (Monday 06:00 UTC) + `workflow_dispatch`. Runs `--update-lock --summary sync-summary.md`. Creates/updates PR via `peter-evans/create-pull-request` with `add-paths: .content-lock.json`. Labels: `automated`, `documentation`. *(Done — workflow created. Originally included `--discover` step; simplified in T054 after discovery mode was removed.)* +- [x] T043 [US7] Update `.github/workflows/deploy-gh-pages.yml`: remove `schedule` cron trigger, add `--lock .content-lock.json` to sync step. Deployments now only occur on push to `main` (after content sync PR merge) or manual `workflow_dispatch`. *(Done — daily cron removed, `--lock` added.)* +- [x] T044 [US7] Update `.github/workflows/ci.yml`: add `--lock .content-lock.json` to dry-run sync step so CI validates lockfile parsability. *(Done — `--lock` flag added to dry-run step.)* +- [x] T045 [US7] Create `cmd/sync-content/lock_test.go`: tests for `readLock`, `writeLock`, `sha`, missing file handling, invalid JSON, deterministic write order. *(Done — 6 test functions: `TestReadWriteLock_RoundTrip`, `TestReadLock_MissingFile`, `TestReadLock_InvalidJSON`, `TestContentLock_SHA`, `TestWriteLock_DeterministicOrder`, `TestReadLock_NilReposInitialized`.)* +- [x] T046 [US7] Update `cmd/sync-content/github_test.go` and `sync_test.go`: add `ref` parameter to all existing API method calls (empty string for unchanged behavior). Add `TestAppendRef`, `TestGetREADME_WithRef`, `TestListDirMD_WithRef`, `TestProcessRepo_LockedSHA`, `TestProcessRepo_LockedSHA_MatchesUpstream`. *(Done — all tests updated and passing with `-race`.)* +- [x] T047 [US7] Bootstrap `.content-lock.json` at project root with `{"repos": {}}` so deploy workflow has a valid starting file. *(Done — file created.)* + +**Checkpoint**: Upstream content changes require human-reviewed PRs. Deploy workflow fetches only approved content. Weekly check detects drift. Constitution XV (v1.3.0) satisfied. + +--- + +## Phase 12: Governance-Driven Discovery (IS-001, Constitution v1.5.0) + +**Purpose**: Replace ad-hoc GitHub API org listing with governance registry (`peribolos.yaml`) as the authoritative source of eligible repositories. Per-repo metadata (stars, language, topics) is still fetched from the GitHub API. + +**Ref**: IS-001 (updated for peribolos), US2, Constitution IV (v1.5.0), Constitution III (v1.5.0) + +- [x] T048 [US2] Add `fetchPeribolosRepos` function in `cmd/sync-content/github.go`: fetch `peribolos.yaml` from `{org}/.github` repo via `GET /repos/{org}/.github/contents/peribolos.yaml`, base64-decode, parse YAML, extract repo names from `orgs.{org}.repos` map. Return `[]string` of repo names. Log fatal and exit non-zero if `.github` repo or file is missing. *(Done — `fetchPeribolosRepos` added with sorted output, tested by `TestFetchPeribolosRepos`.)* +- [x] T049 [P] [US2] Add `PeribolosConfig` types in `cmd/sync-content/config.go`: `PeribolosConfig` struct with `Orgs map[string]PeribolosOrg`, `PeribolosOrg` with `Repos map[string]PeribolosRepo`, `PeribolosRepo` with `Description string` and `DefaultBranch string` fields. Parsed by `gopkg.in/yaml.v3` (no new dependency). *(Done — types added to `config.go`.)* +- [x] T050 [US2] Replace `listOrgRepos` call in `cmd/sync-content/main.go`: replaced with `fetchPeribolosRepos` to get repo names, then `getRepoMetadata` per repo. `listOrgRepos` and `pageSize` constant removed as dead code. *(Done — single unified code path through peribolos for all modes.)* +- [x] T051 [P] [US2] Add `getRepoMetadata` function in `cmd/sync-content/github.go`: fetch `GET /repos/{owner}/{name}` and decode into `Repo` struct. *(Done — tested by `TestGetRepoMetadata`.)* +- [x] T052 [US2] Tighten `--repo` flag: `--repo` now validates the target against `peribolosSet` and rejects repos not in the governance registry. No peribolos bypass. Config-only sources also validated against peribolos. *(Done — strict governance gate on all entry points.)* +- [x] T053 [US2] Add peribolos tests in `cmd/sync-content/github_test.go`: mock `peribolos.yaml` fetch, test parsing (success, missing org, 404). Test `getRepoMetadata`. Discovery test updated to mock peribolos. *(Done — `TestFetchPeribolosRepos` (3 subtests), `TestGetRepoMetadata`.)* +- [x] T054 [US2] Remove `discovery.go` and `--discover` flag: discovery mode was redundant after governance-driven repo listing — all repos from peribolos are auto-synced by the main path, and doc pages are scanned via `syncRepoDocPages` using `scan_paths`. `discovery_test.go` also removed. `sync-content-check.yml` workflow simplified (discover step removed). *(Done — 10 source files, 10 test files remain.)* + +**Checkpoint**: Repo listing is governance-driven. Peribolos is the single source of truth for which repos exist — all code paths (main sync, `--repo`, config sources) are gated. Per-repo metadata comes from the API. Discovery mode removed as redundant. All edge cases (missing peribolos, deleted repos, single-repo validation) are handled and tested. + +--- + +## Phase 13: Content Transform Improvements (Heading Casing & Normalisation) + +**Purpose**: Ensure uniform heading casing, remove duplicate leading H1s, shift heading levels for Hugo ToC correctness, and add a Hugo render hook for anchor links. These transforms guarantee consistent presentation across content synced from multiple upstream repos with varying conventions (lowercase headings, ALL CAPS filenames, duplicate H1 titles). + +**Ref**: IS-003 (page generation with `formatRepoTitle`, `linkTitle`), IS-008 (updated transforms), IS-042 (render heading hook) + +- [x] T055 [P] Add `shiftHeadings` transform in `cmd/sync-content/transform.go`: regex-based heading level bump (H1→H2, H2→H3, …) so Hugo's page title is the sole H1. Applied unconditionally to all synced content in `processRepo`, `syncConfigSource`, and `syncRepoDocPages`. Tested by `TestShiftHeadings` (6 subtests) in `transform_test.go`. *(Done — `headingRe` regex + `shiftHeadings` function.)* +- [x] T056 [P] Add `titleCaseHeadings` transform in `cmd/sync-content/transform.go`: applies `smartTitle` (acronym-aware Title Case) to all in-page Markdown heading text. This transform runs in Go (not Hugo) because Hugo's `{{ .TableOfContents }}` is built from raw Markdown — a render hook would only change HTML, leaving ToC entries inconsistent. Tested by `TestTitleCaseHeadings` (8 subtests) in `transform_test.go`. *(Done — `headingFullRe` regex + `titleCaseHeadings` function. ToC consistency verified: heading text in Markdown matches ToC output.)* +- [x] T057 [P] Add `stripLeadingH1` transform in `cmd/sync-content/transform.go`: removes the first H1 (`# `) from content body since the title is already in frontmatter. Prevents duplicate page titles. Tested by `TestStripLeadingH1` (5 subtests) in `transform_test.go`. *(Done — `strings.SplitN`/`strings.HasPrefix` approach for precise detection.)* +- [x] T058 [P] Add `knownAcronyms` map and `smartTitle` function in `cmd/sync-content/hugo.go`: ~30-entry map of canonical acronyms (API, OSCAL, CLI, OAuth, UUID, etc.). `smartTitle` capitalises first letter, lowercases rest, and preserves acronyms. Used by `formatRepoTitle`, `titleFromFilename`, and `titleCaseHeadings`. Tested by `TestSmartTitle` (8 subtests) in `hugo_test.go`. *(Done — includes ALL CAPS normalisation: `CONTRIBUTING` → `Contributing`.)* +- [x] T059 [P] Add `formatRepoTitle` function in `cmd/sync-content/hugo.go`: converts repo names (e.g. `oscal-sdk` → `OSCAL SDK`) using `smartTitle` for `title` frontmatter in `buildSectionIndex`. Raw repo name set as `linkTitle` for sidebar label. Tested by `TestFormatRepoTitle` in `hugo_test.go`. *(Done — `buildSectionIndex` uses `formatRepoTitle` for title, `repo.Name` for linkTitle.)* +- [x] T060 Create Hugo render heading hook at `layouts/_default/_markup/render-heading.html`: adds anchor `id`, clickable `#` link, and `heading` CSS class to all headings site-wide. Overrides Doks `headlineHash` partial. Validates IS-042. *(Done — committed layout override.)* +- [x] T061 Integrate transforms into sync pipeline in `cmd/sync-content/sync.go`: add `stripLeadingH1`, `shiftHeadings`, `titleCaseHeadings` calls to `processRepo` (overview page), `syncConfigSource`, and `syncRepoDocPages`. Transform order: strip leading H1 → shift headings → title-case headings. Update existing integration test assertions in `sync_test.go` (`TestProcessRepo`, `TestSyncConfigSource`, `TestSyncRepoDocPages`). *(Done — all 3 call sites updated, all integration tests pass.)* +- [x] T062 Update `cmd/sync-content/README.md` and `specs/006-go-sync-tool/spec.md`: document all new transforms, ALL CAPS normalisation, and render heading hook. Update test coverage table. *(Done — README and spec updated.)* + +**Checkpoint**: All synced content has uniform Title Case headings, no duplicate leading H1s, and correct heading levels for ToC. ALL CAPS filenames (`CONTRIBUTING.md`) produce `Contributing` in sidebar and page titles. Hugo render hook provides anchor links site-wide. All transforms tested (57 test functions across 10 test files). + +--- + +## Phase 14: Diagram Block Rewriting (Kroki Integration) + +**Purpose**: Convert upstream diagram code blocks (mermaid, plantuml, d2, graphviz/dot, ditaa, and other Kroki-supported languages) to `kroki {type=…}` format so they render server-side via Doks' `render-codeblock-kroki.html` hook. This avoids client-side JavaScript diagram rendering (Constitution V) and ensures consistent rendering across all upstream repos regardless of their diagram conventions. + +**Ref**: IS-005 (updated: `rewrite_diagrams` config transform), IS-008 (updated: `rewriteDiagramBlocks`), SC-017, Constitution V + +- [x] T063 [P] Add `rewriteDiagramBlocks` transform in `cmd/sync-content/transform.go`: regex-based code fence rewrite converting `` ```mermaid ``, `` ```plantuml ``, `` ```d2 ``, `` ```graphviz ``, `` ```dot `` (normalised to `graphviz`), `` ```ditaa ``, and other Kroki-supported languages to `` ```kroki {type=…} `` format. Add `RewriteDiagrams bool` field to `Transform` struct in `config.go`. Tested by `TestRewriteDiagramBlocks` (12 subtests) in `transform_test.go`. *(Done — `diagramBlockRe` regex supports 17 diagram languages. `dot` normalised to `graphviz` for Kroki compatibility.)* +- [x] T064 Integrate `rewriteDiagramBlocks` into sync pipeline in `cmd/sync-content/sync.go`: applied unconditionally in `processRepo` (overview pages) and `syncRepoDocPages` (doc sub-pages); conditionally in `syncConfigSource` (gated by `file.Transform.RewriteDiagrams`). *(Done — 3 call sites updated.)* +- [x] T065 Update spec and plan with diagram rewrite documentation: add `rewrite_diagrams` to IS-005, `rewriteDiagramBlocks` to IS-008, update Overview and Dependencies, add SC-017, update plan summary and constitution check table, add Phase 14 tasks. *(Done — this remediation.)* + +**Checkpoint**: Upstream diagram code blocks render server-side via Kroki. No client-side JS required (Constitution V). Config overlay supports `rewrite_diagrams` for precision control. 17 supported diagram languages. + +--- + +## Appendix: Implicit Coverage Note + +> Tasks T003 (dry-run) and T004 (write mode) implicitly exercise the `--timeout`, `--workers` flags, the `maxRetries` constant, and byte-level dedup at their default values. Dedicated isolated tests for these parameters are covered by US6 unit tests (T015, T016) and the race detector run (T017). Hardening phase (Phase 8) covers adversarial and defensive scenarios not reached by happy-path verification. +> +> **NFR verification approach**: NFR-001 (sync < 60s) is verified by T004 timing observation. NFR-002 (Hugo build < 2s) is verified by T005 (1072ms achieved). NFR-003 (structured logging via `log/slog`) is enforced by constitution principle XI and verified by code review — no `fmt.Println` or `log.Printf` exists in source files. NFR-005 (single `package main`) is enforced by constitution principle XIV and verified by the `go build` step (SC-002). NFR-006 (single third-party dep) is verified by `go.mod` inspection (SC-001). These NFRs have no dedicated tasks because they are architectural invariants verified by constitution checks rather than behavioral requirements. +> +> IS-016 (single-repo mode via `--repo`) has no dedicated verification task. The flag is functional and exercised by unit tests in `sync_test.go` (`TestParseNameList_RepoFilterOverridesExclude`). It was ported from the reference implementation and is a convenience shortcut for `--include` with a single repo — no separate integration-level task was needed. +> +> IS-017 (summary file generation via `--summary`) verification is included in T024 rather than a standalone task. The `toMarkdown()` method that generates summary content has no dedicated unit test — its output is exercised by integration-level CI runs. + + From 3b1848e3fa7e98d7da14ce237b64aeff3340aa3e Mon Sep 17 00:00:00 2001 From: Sonu Preetam Date: Thu, 26 Mar 2026 07:56:41 -0400 Subject: [PATCH 3/5] feat(cli): skip index.md to prevent Hugo leaf bundle conflict Signed-off-by: Sonu Preetam --- CONTRIBUTING.md | 110 ++++++++++----- README.md | 128 ++---------------- cmd/sync-content/README.md | 243 +++------------------------------- cmd/sync-content/sync.go | 10 ++ cmd/sync-content/sync_test.go | 71 ++++++++++ package-lock.json | 27 ++-- 6 files changed, 203 insertions(+), 386 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a6f7694..161c58a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -27,6 +27,7 @@ oriented quickly. - [CI/CD and Deployment](#cicd-and-deployment) - [Coding Conventions](#coding-conventions) - [Pull Request Process](#pull-request-process) +- [Development Workflow](#development-workflow) - [Troubleshooting](#troubleshooting) --- @@ -88,25 +89,6 @@ watches for file changes and rebuilds automatically. > GitHub org. Set the `GITHUB_TOKEN` environment variable for higher API rate > limits. Without it, unauthenticated requests are limited to 60/hour. -### Other Useful Commands - -```bash -# Production build (output → public/) -npm run build - -# Preview the production build -npm run preview - -# Format files with Prettier -npm run format - -# Sync content in dry-run mode (preview without writing) -go run ./cmd/sync-content --org complytime --config sync-config.yaml - -# Run Go tests -go test -race ./cmd/sync-content/... -``` - --- ## Project Structure @@ -221,7 +203,8 @@ manually. To add a new project: For repos needing custom file sync with transforms (e.g., specific doc pages with injected frontmatter), add a source entry in `sync-config.yaml`. See -`specs/006-go-sync-tool/quickstart.md` for details. +[cmd/sync-content/README.md](cmd/sync-content/README.md#configuration) for the +config format. ### Change Navigation Menus @@ -261,7 +244,7 @@ remove a feature card, look for the `` comment in Edit `assets/scss/common/_variables-custom.scss`: ```scss -// Brand colors — change these to re-theme the site +// Brand colors — change these to re-theme the sitee $cyan-600: #0891b2; // Primary color $primary: $cyan-600; @@ -420,31 +403,92 @@ style: fix indentation in home template --- -## Troubleshooting +## Development Workflow -### `npm run dev` fails with Hugo errors +### Day-to-Day -Make sure you're on Node.js ≥ 20.11.0: +```bash +npm run dev # Dev server with live reload +npm run build # Production build (output → public/) +npm run preview # Preview the production build +npm run format # Format files with Prettier +``` + +### Clean Build (CI Match) + +`hugo server` and `hugo --minify --gc` can produce different results. Always +validate with a production build before trusting the dev server: ```bash -node --version +rm -rf public/ resources/ +npm run build ``` -If Hugo isn't found, it's installed as part of the npm dependencies. Try: +### Full Nuclear Clean + +When something looks wrong and a clean build isn't enough — removes cached +Hugo resources, node_modules, and reinstalls from the lockfile: ```bash -rm -rf node_modules -npm install -npm run dev +rm -rf public/ resources/ /tmp/hugo_cache/ node_modules/ +npm ci +npm run build ``` -### Changes not showing up in the browser +### Testing the Sync Tool + +Dry-run (preview without writing): + +```bash +go run ./cmd/sync-content --org complytime --config sync-config.yaml +``` + +Full reset and resync (useful when upstream content or sync logic changes): + +```bash +rm -f .sync-manifest.json data/projects.json +rm -rf content/docs/projects/*/ +rm -rf public/ resources/ +go run ./cmd/sync-content --org complytime --config sync-config.yaml --lock .content-lock.json --write +npm run build +``` + +Run Go tests: -Hugo's dev server uses live reload. If it's not picking up changes: +```bash +go test -race ./cmd/sync-content/... +``` + +If you encounter missing token errors, verify your `GITHUB_TOKEN`: + +```bash +export GITHUB_TOKEN="$(gh auth token)" +echo "Token set, length: ${#GITHUB_TOKEN}, prefix: ${GITHUB_TOKEN:0:4}" +go run ./cmd/sync-content --org complytime --config sync-config.yaml --write +``` + +### Testing Tips + +- Always test with **browser cache disabled** (DevTools → Network → + "Disable cache"). +- When in doubt, run a [clean build](#clean-build-ci-match) — the dev server + can mask issues that show up in production. + +--- + +## Troubleshooting + +### `npm run dev` fails with Hugo errors + +Make sure you're on Node.js ≥ 22. If Hugo isn't found, try a +[full nuclear clean](#full-nuclear-clean). + +### Changes not showing up in the browser 1. Check the terminal for build errors 2. Try stopping and restarting `npm run dev` -3. For SCSS changes, a hard refresh (Cmd+Shift+R / Ctrl+Shift+R) may be needed +3. Hard refresh (Cmd+Shift+R / Ctrl+Shift+R) for SCSS changes +4. Test with browser cache disabled (DevTools → Network → "Disable cache") ### Image processing errors @@ -456,7 +500,7 @@ handle most cases. If it doesn't: - Local SVGs: Place them in `assets/` or a page bundle - The error level is set to `ignore` in `params.toml`, so most issues are silently skipped - +e ### Build output confusion: `public/` vs `docs/` - **`public/`** — Hugo's build output directory (generated, gitignored) diff --git a/README.md b/README.md index 696725a..97dca84 100644 --- a/README.md +++ b/README.md @@ -4,145 +4,45 @@ The official website for [ComplyTime](https://github.com/complytime) - Cloud Nat Built with [Hugo](https://gohugo.io/) and the [Doks](https://getdoks.org/) theme. -## 🚀 Quick Start - -### Prerequisites - -- [Node.js](https://nodejs.org/) v22 or later -- [npm](https://www.npmjs.com/) (included with Node.js) -- [Go](https://go.dev/) 1.25+ (for the content sync tool) -- (Recommended) `GITHUB_TOKEN` env var for higher API rate limits - -### Development +## Quick Start ```bash -# Install dependencies npm install - -# Sync project content from GitHub org (generates project pages and cards) go run ./cmd/sync-content --org complytime --config sync-config.yaml --write - -# Start development server npm run dev ``` The site will be available at `http://localhost:1313/`. -### Build - -```bash -# Build for production -npm run build -``` +**Prerequisites**: Node.js v22+, Go 1.25+, and (recommended) a `GITHUB_TOKEN` env var for higher API rate limits. See [CONTRIBUTING.md](CONTRIBUTING.md#prerequisites) for details. -The output will be in the `public/` directory. +**Production build**: `npm run build` (output in `public/`). -## 📁 Project Structure +## Project Structure ``` website/ -├── assets/ # SCSS, JavaScript, images -│ ├── js/ -│ │ └── custom.js -│ └── scss/ -│ └── common/ -│ ├── _custom.scss -│ └── _variables-custom.scss ├── cmd/sync-content/ # Go content sync tool (10 source files, package main) -├── config/ # Hugo configuration -│ ├── _default/ -│ │ ├── hugo.toml -│ │ ├── languages.toml -│ │ ├── params.toml -│ │ └── menus/ -│ │ └── menus.en.toml -│ ├── production/ # Production overrides -│ └── next/ # Alternative env overrides -├── content/ # Markdown content -│ ├── _index.md # Homepage -│ ├── docs/ # Documentation -│ │ ├── getting-started/ -│ │ └── projects/ # Project pages (generated by sync tool, gitignored) -│ └── privacy.md -├── layouts/ # Custom layouts -│ ├── home.html # Homepage layout (reads data/projects.json) -│ └── docs/ -│ └── list.html # Docs section listing layout -├── static/ # Static assets (favicons, icons) +├── config/_default/ # Hugo configuration (TOML) +├── content/docs/ # Markdown content (projects/ is generated by sync tool) +├── data/projects.json # Generated landing page cards (gitignored) +├── layouts/ # Custom Hugo layout overrides ├── sync-config.yaml # Declarative sync configuration ├── .content-lock.json # Approved upstream SHAs per repo (committed) -├── go.mod / go.sum # Go module and checksums -├── .github/ -│ └── workflows/ -│ ├── deploy-gh-pages.yml # Deploy (sync at locked SHAs + Hugo build) -│ ├── ci.yml # PR validation (lint, test, dry-run, build) -│ └── sync-content-check.yml # Weekly content check (opens PR) -└── package.json -``` - -## 📝 Content - -### Navigation - -| Menu Item | URL | Description | -|---------------|------------------------|----------------------------| -| Getting Started | `/docs/getting-started/` | Documentation landing page | -| Projects | `/docs/projects/` | ComplyTime project pages | -| Privacy Policy | `/privacy/` | Privacy policy | - -### Adding Documentation - -Create a new Markdown file in the appropriate directory under `content/docs/`: - -```markdown ---- -title: "Page Title" -description: "Page description" -lead: "Brief intro text" -date: 2024-01-01T00:00:00+00:00 -draft: false -weight: 100 -toc: true ---- - -Your content here... +└── .github/workflows/ # CI, deploy, weekly content check ``` -## 🎨 Customization - -### Styling - -Custom styles are in `assets/scss/common/`: -- `_variables-custom.scss` - Variables and theme colors -- `_custom.scss` - Additional custom styles - -### Configuration - -Site configuration is in `config/_default/`: -- `hugo.toml` - Hugo settings -- `languages.toml` - Language and footer settings -- `params.toml` - Theme parameters -- `menus/menus.en.toml` - Navigation menus - -## 🚢 Deployment - -The site uses three GitHub Actions workflows: - -- **`ci.yml`** — validates PRs with `go test -race`, content sync (with `--lock --write`), and Hugo build -- **`sync-content-check.yml`** — runs weekly to detect upstream doc changes and opens a PR to update `.content-lock.json` -- **`deploy-gh-pages.yml`** — on push to `main`, syncs content at approved SHAs, builds with Hugo, and deploys `public/` to GitHub Pages - -Upstream content changes require a reviewed PR before reaching production. +See [CONTRIBUTING.md](CONTRIBUTING.md#project-structure) for the full annotated tree. -## 🤝 Contributing +## Contributing -Contributions are welcome! Please see our [Contributing Guide](https://github.com/complytime/community). +Contributions are welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) for setup, workflow, coding conventions, and PR process. -## 📄 License +## License This website is licensed under [Apache 2.0](LICENSE). -## 🔗 Links +## Links - [ComplyTime GitHub](https://github.com/complytime) - [Doks Theme](https://getdoks.org/) diff --git a/cmd/sync-content/README.md b/cmd/sync-content/README.md index 0e1c8d9..0a39850 100644 --- a/cmd/sync-content/README.md +++ b/cmd/sync-content/README.md @@ -55,104 +55,19 @@ Config sources can operate alongside or instead of the org scan per-repo: | `false` (default) | Generated from README | Synced as additional content | Yes | | `true` | Suppressed | Synced as primary content | Yes | -## Quick Start +## Quick Reference -### Prerequisites +For setup, prerequisites, and day-to-day commands, see +[CONTRIBUTING.md](../../CONTRIBUTING.md#development-workflow). For the full CLI +flag reference, see the [spec](../../specs/006-go-sync-tool/spec.md#cli-interface). -- **Go 1.25+** — the sync tool is pure Go with one dependency (`gopkg.in/yaml.v3`) -- **Node.js 22+** — for the Hugo/Doks theme build (`npm ci`) -- **Hugo extended** — the static site generator -- **`GITHUB_TOKEN`** (recommended) — unauthenticated rate limit is 60 requests/hour - -### 1. Dry-run (preview without writing) - -```bash -go run ./cmd/sync-content --org complytime --config sync-config.yaml -``` - -Logs every action the tool would take but creates zero files. This is the default -mode — you must explicitly opt in to writes. - -### 2. Write mode (generate content) - -```bash -go run ./cmd/sync-content --org complytime --config sync-config.yaml --write -``` - -Produces: - -| Output | Path | -|--------|------| -| Per-repo section index | `content/docs/projects/{repo}/_index.md` | -| Per-repo README page | `content/docs/projects/{repo}/overview.md` | -| Auto-discovered doc pages | `content/docs/projects/{repo}/*.md` | -| Landing page card data | `data/projects.json` | -| Sync manifest | `.sync-manifest.json` | -| Content lockfile (with `--update-lock`) | `.content-lock.json` | - -### 3. Start Hugo - -```bash -npm run dev -``` - -Navigate to `http://localhost:1313/`. Project pages appear at `/docs/projects/`. - -### 4. Build for production - -```bash -# Local dev (fetches HEAD): -go run ./cmd/sync-content --org complytime --config sync-config.yaml --write - -# Production (fetches at approved SHAs): -go run ./cmd/sync-content --org complytime --config sync-config.yaml --lock .content-lock.json --write - -hugo --minify --gc -``` - -Output is in `public/`. The `--lock` flag ensures content matches the approved -SHAs in `.content-lock.json`. Omit it for local development to fetch latest HEAD. - -## CLI Reference - -| Flag | Default | Description | -|------|---------|-------------| -| `--org` | `complytime` | GitHub organization (reads `peribolos.yaml` from `{org}/.github` repo) | -| `--token` | `$GITHUB_TOKEN` | GitHub API token (or set the env var) | -| `--config` | _(none)_ | Path to `sync-config.yaml` for config-driven file syncs | -| `--write` | `false` | Apply changes to disk (without this flag, everything is a dry-run) | -| `--output` | `.` | Hugo site root directory | -| `--repo` | _(none)_ | Sync only this repo, e.g. `complytime/complyctl` | -| `--include` | _(all)_ | Comma-separated repo allowlist (empty = all eligible repos) | -| `--exclude` | _(see config)_ | Comma-separated repo names to skip; merged with `discovery.ignore_repos` in `sync-config.yaml` | -| `--workers` | `5` | Maximum concurrent repo processing goroutines | -| `--timeout` | `3m` | Overall timeout for all API operations | -| `--summary` | _(none)_ | Write a Markdown change summary to this file (for PR bodies) | -| `--lock` | _(none)_ | Path to `.content-lock.json` for content approval gating | -| `--update-lock` | `false` | Write current upstream SHAs to the lockfile (requires `--lock`) | - -## Common Tasks - -### Sync a single repository +The essentials: ```bash -go run ./cmd/sync-content --repo complytime/complyctl --config sync-config.yaml --write -``` - -### Generate a change summary for PR review - -```bash -go run ./cmd/sync-content --org complytime --config sync-config.yaml --write \ - --summary sync-report.md -``` - -The summary file contains a Markdown report with new/updated/removed repos and -stats. - -### Increase concurrency for faster syncs - -```bash -go run ./cmd/sync-content --org complytime --workers 10 --write +go run ./cmd/sync-content --org complytime --config sync-config.yaml # dry-run +go run ./cmd/sync-content --org complytime --config sync-config.yaml --write # write mode +go run ./cmd/sync-content --repo complytime/complyctl --config sync-config.yaml --write # single repo +go test -race ./cmd/sync-content/... # run tests ``` ## Configuration @@ -411,141 +326,15 @@ params: ## CI/CD Integration -### Three-Workflow Model +The tool integrates with three GitHub Actions workflows. See +[CONTRIBUTING.md](../../CONTRIBUTING.md#cicd-and-deployment) for workflow details. -The tool integrates with three GitHub Actions workflows (Constitution XV v1.3.0): +**Structured outputs** — when running in GitHub Actions, the tool writes to +`$GITHUB_OUTPUT` (`has_changes`, `changed_count`, `error_count`) and +`$GITHUB_STEP_SUMMARY` (Markdown sync report). The `--summary` flag writes the +same report to a file for PR body generation. -**1. CI (`ci.yml`)** — PR validation (syncs content and builds the site to catch breakage): - -```yaml -- name: Sync content - run: go run ./cmd/sync-content --org complytime --config sync-config.yaml --lock .content-lock.json --write - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -``` - -**2. Content Sync Check (`sync-content-check.yml`)** — weekly upstream detection: - -```yaml -- name: Check for upstream changes - run: go run ./cmd/sync-content --org complytime --config sync-config.yaml --lock .content-lock.json --update-lock --summary sync-summary.md -``` - -Checks upstream SHAs and creates/updates a PR with lockfile changes when content has moved. Since peribolos provides the authoritative repo list, separate discovery is unnecessary. - -**3. Deploy (`deploy-gh-pages.yml`)** — production build: - -```yaml -- name: Sync content - run: go run ./cmd/sync-content --org complytime --config sync-config.yaml --lock .content-lock.json --write - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - -- name: Build site - run: hugo --minify --gc -``` - -Upstream content changes require a reviewed PR before reaching production — no -unreviewed content is deployed. - -### Structured Outputs - -When running in GitHub Actions, the tool writes structured data to -`$GITHUB_OUTPUT` and `$GITHUB_STEP_SUMMARY`: - -**`GITHUB_OUTPUT`:** - -``` -has_changes=true -changed_count=3 -error_count=0 -``` - -**`GITHUB_STEP_SUMMARY`:** A Markdown table with new/updated/removed repos and -sync stats. - -**`--summary` flag:** Writes the same Markdown report to a file, useful for -automated PR body generation. - -### Exit Codes - -| Code | Meaning | -|------|---------| -| 0 | Success (all repos synced or dry-run complete) | -| 1 | One or more errors occurred (API failures, write errors) | - -## Testing - -Tests are split across 10 `*_test.go` files that mirror the source files. A -shared `helpers_test.go` provides common utilities. - -```bash -# Run all tests -go test ./cmd/sync-content/... - -# Run with race detector -go test -race ./cmd/sync-content/... - -# Run with verbose output -go test -v ./cmd/sync-content/... -``` - -### Test Coverage - -| Category | What's tested | -|----------|---------------| -| Config loading | Valid YAML, malformed YAML, missing file, default values, missing required fields | -| Frontmatter injection | Prepend to bare content, replace existing frontmatter, empty content | -| Badge stripping | Line-start badges removed, inline badges preserved, no-badge passthrough | -| Heading shifting | All headings bumped down one level (H1→H2, H2→H3, …) so Hugo page title is the sole H1 | -| Heading casing | ALL CAPS normalised to Title Case, acronyms preserved, mixed-case normalised, multi-word headings | -| Title from filename | ALL CAPS filenames (`CONTRIBUTING.md` → `Contributing`), hyphen/underscore splitting, acronym preservation | -| Link rewriting | Relative to absolute, images to raw URLs, absolute URLs unchanged, anchors unchanged, `./` prefix | -| Repo name validation | Valid names, empty, `.`, `..`, path separators | -| `processRepo` integration | Mock API server, project page written with correct frontmatter, headings shifted, README SHA recorded | -| Branch-unchanged fast path | No README fetch when branch SHA matches, manifest carry-forward | -| Branch-changed README-unchanged | Two-tier detection classifies as unchanged | -| `syncConfigSource` | All transforms applied, provenance comment inserted, dry-run writes nothing | -| Doc page scanning | Auto-syncs `docs/*.md`, skips config-tracked files, generates section indexes | -| Manifest round-trip | Write and read manifest, orphan cleanup, empty directory pruning | -| Concurrent access | Race-safe `syncResult` mutations, concurrent `recordFile` | -| Peribolos integration | Governance registry fetch, repo validation, missing org handling | - -All integration tests use `net/http/httptest` to mock the GitHub API. No real API -calls are made during testing. - -## File Inventory - -``` -cmd/sync-content/ -├── main.go # Entry point and orchestration (~440 lines) -├── config.go # Config types and loading -├── github.go # GitHub API client and types -├── transform.go # Markdown transforms (links, badges, frontmatter) -├── hugo.go # Hugo page and card generation -├── sync.go # Sync logic, result tracking, repo processing -├── manifest.go # Manifest I/O and state tracking -├── cleanup.go # Orphan and stale content removal -├── path.go # Path validation utilities -├── lock.go # Content lockfile read/write/query -├── *_test.go # Tests mirror source files (10 files) -└── README.md # This file - -sync-config.yaml # Declarative sync config (repo root) -.content-lock.json # Approved upstream SHAs per repo (committed) -go.mod # Go module: github.com/complytime/website -go.sum # Dependency checksums -``` - -### Generated Files (gitignored, not committed) - -``` -content/docs/projects/{repo}/_index.md # Section index (metadata only) -content/docs/projects/{repo}/overview.md # README content page -content/docs/projects/{repo}/*.md # Auto-discovered doc pages -data/projects.json # Landing page card data -.sync-manifest.json # Orphan tracking manifest -``` +**Exit codes**: `0` = success, `1` = one or more errors (API failures, write errors). ## License diff --git a/cmd/sync-content/sync.go b/cmd/sync-content/sync.go index fa033ed..2d1ae49 100644 --- a/cmd/sync-content/sync.go +++ b/cmd/sync-content/sync.go @@ -419,6 +419,16 @@ func syncRepoDocPages(ctx context.Context, gh *apiClient, org string, repo Repo, continue } + // Hugo treats index.md as a leaf bundle, which conflicts + // with the _index.md branch bundle (section page) the sync + // tool generates for every project directory. Allowing both + // causes Hugo to hide the section and its children. + if strings.EqualFold(baseName, "index.md") { + logger.Info("skipping index.md (conflicts with Hugo _index.md section page)", + "src", filePath) + continue + } + relPath := strings.TrimPrefix(filePath, scanPath+"/") destRel := filepath.Join("content", "docs", "projects", repo.Name, relPath) destPath := filepath.Join(output, destRel) diff --git a/cmd/sync-content/sync_test.go b/cmd/sync-content/sync_test.go index 2a784c2..7c5835a 100644 --- a/cmd/sync-content/sync_test.go +++ b/cmd/sync-content/sync_test.go @@ -808,6 +808,77 @@ func TestProcessRepo_LockedSHA_MatchesUpstream(t *testing.T) { } } +func TestSyncRepoDocPages_SkipsIndexMD(t *testing.T) { + fetchedFiles := make(map[string]bool) + + mux := http.NewServeMux() + + mux.HandleFunc("/repos/testorg/test-repo/contents/docs", func(w http.ResponseWriter, r *http.Request) { + _ = json.NewEncoder(w).Encode([]DirEntry{ + {Name: "index.md", Path: "docs/index.md", Type: "file"}, + {Name: "usage.md", Path: "docs/usage.md", Type: "file"}, + }) + }) + mux.HandleFunc("/repos/testorg/test-repo/contents/docs/index.md", func(w http.ResponseWriter, r *http.Request) { + fetchedFiles["docs/index.md"] = true + _ = json.NewEncoder(w).Encode(FileResponse{ + Content: b64("# Index\n\nThis is a mkdocs index."), + Encoding: "base64", + SHA: "sha-index", + }) + }) + mux.HandleFunc("/repos/testorg/test-repo/contents/docs/usage.md", func(w http.ResponseWriter, r *http.Request) { + fetchedFiles["docs/usage.md"] = true + _ = json.NewEncoder(w).Encode(FileResponse{ + Content: b64("# Usage\n\nRun it."), + Encoding: "base64", + SHA: "sha-usage", + }) + }) + + server := httptest.NewServer(mux) + defer server.Close() + + gh := newTestClient(server.URL) + output := t.TempDir() + ctx := context.Background() + + repo := Repo{ + Name: "test-repo", + FullName: "testorg/test-repo", + Description: "A test repository", + Language: "Go", + HTMLURL: "https://github.com/testorg/test-repo", + DefaultBranch: "main", + PushedAt: "2025-01-15T00:00:00Z", + } + + discovery := Discovery{ScanPaths: []string{"docs"}} + result := &syncResult{} + syncRepoDocPages(ctx, gh, "testorg", repo, output, true, discovery, nil, nil, result, "") + + if fetchedFiles["docs/index.md"] { + t.Error("index.md should not have been fetched (conflicts with Hugo _index.md)") + } + if !fetchedFiles["docs/usage.md"] { + t.Error("usage.md should have been fetched") + } + + indexPath := filepath.Join(output, "content", "docs", "projects", "test-repo", "index.md") + if _, err := os.Stat(indexPath); !os.IsNotExist(err) { + t.Error("index.md should not have been written (would create Hugo leaf bundle conflict)") + } + + usagePath := filepath.Join(output, "content", "docs", "projects", "test-repo", "usage.md") + if _, err := os.Stat(usagePath); err != nil { + t.Fatalf("usage.md should have been written: %v", err) + } + + if result.synced != 1 { + t.Errorf("synced = %d, want 1 (only usage.md)", result.synced) + } +} + func TestParseNameList_RepoFilterOverridesExclude(t *testing.T) { _ = parseNameList("") excludeSet := parseNameList("complyctl,complyscribe") diff --git a/package-lock.json b/package-lock.json index b4cacaf..a4081d2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -4542,9 +4542,9 @@ "license": "ISC" }, "node_modules/picomatch": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", - "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz", + "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==", "license": "MIT", "engines": { "node": ">=8.6" @@ -5488,9 +5488,9 @@ } }, "node_modules/tinyglobby/node_modules/picomatch": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", - "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", + "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "license": "MIT", "engines": { "node": ">=12" @@ -5698,9 +5698,9 @@ } }, "node_modules/vite/node_modules/picomatch": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", - "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", + "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "dev": true, "license": "MIT", "engines": { @@ -5788,15 +5788,18 @@ "license": "ISC" }, "node_modules/yaml": { - "version": "2.8.0", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.0.tgz", - "integrity": "sha512-4lLa/EcQCB0cJkyts+FpIRx5G/llPxfP6VQU5KByHEhLxY3IJCH0f0Hy1MHI8sClTvsIb8qwRJ6R/ZdlDJ/leQ==", + "version": "2.8.3", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.3.tgz", + "integrity": "sha512-AvbaCLOO2Otw/lW5bmh9d/WEdcDFdQp2Z2ZUH3pX9U2ihyUY0nvLv7J6TrWowklRGPYbB/IuIMfYgxaCPg5Bpg==", "license": "ISC", "bin": { "yaml": "bin.mjs" }, "engines": { "node": ">= 14.6" + }, + "funding": { + "url": "https://github.com/sponsors/eemeli" } }, "node_modules/yargs": { From 563515631153956a08bbda166f4988cf6de51e6d Mon Sep 17 00:00:00 2001 From: Sonu Preetam Date: Thu, 26 Mar 2026 08:37:25 -0400 Subject: [PATCH 4/5] feat(docs): update spec with the changes Signed-off-by: Sonu Preetam --- cmd/sync-content/README.md | 1 + specs/006-go-sync-tool/plan.md | 76 ++++----------------------------- specs/006-go-sync-tool/spec.md | 22 +++++++--- specs/006-go-sync-tool/tasks.md | 24 ++++++++--- 4 files changed, 43 insertions(+), 80 deletions(-) diff --git a/cmd/sync-content/README.md b/cmd/sync-content/README.md index 0a39850..1daad03 100644 --- a/cmd/sync-content/README.md +++ b/cmd/sync-content/README.md @@ -322,6 +322,7 @@ params: | `titleCaseHeadings` | Applies acronym-aware Title Case to all in-page heading text (e.g. `## getting started` → `## Getting Started`, `## api reference` → `## API Reference`, `## CONTRIBUTING` → `## Contributing`); normalises ALL CAPS words while preserving known acronyms; ensures page headings and Hugo's TableOfContents match | | `stripBadges` | Removes `[![alt](img)](link)` badge patterns from the start of content | | `rewriteRelativeLinks` | Converts `[text](path)` to `[text](https://github.com/.../blob/main/path)` and `![alt](img)` to `![alt](https://raw.githubusercontent.com/.../img)` | +| `rewriteDiagramBlocks` | Converts fenced diagram code blocks (mermaid, plantuml, d2, graphviz/dot, ditaa, and 12 other Kroki-supported languages) to `` ```kroki {type=…} `` format for server-side rendering; `dot` normalised to `graphviz` | | `injectFrontmatter` | Prepends or replaces YAML frontmatter with declared key-value pairs | ## CI/CD Integration diff --git a/specs/006-go-sync-tool/plan.md b/specs/006-go-sync-tool/plan.md index 46c3feb..e8bd822 100644 --- a/specs/006-go-sync-tool/plan.md +++ b/specs/006-go-sync-tool/plan.md @@ -17,7 +17,7 @@ Replace the config-only `cmd/sync-content` tool with the production-quality gove **Project Type**: CLI tool (Go) embedded in a static website repo (Hugo) **Performance Goals**: Full org sync < 60s with token; Hugo build < 2s **Constraints**: All code in `package main` within `cmd/sync-content/` (Constitution XIV: Simplicity); third-party deps minimized — `gopkg.in/yaml.v3` is the sole dep (Constitution II) -**Scale/Scope**: 10 eligible repos in org, 10 Go source files, 10 test files +**Scale/Scope**: 10 repos in org (4 eligible after `ignore_repos` filtering), 10 Go source files, 10 test files ## Constitution Check (Pre-Design) @@ -27,71 +27,13 @@ Replace the config-only `cmd/sync-content` tool with the production-quality gove ## Project Structure -### Documentation (this feature) - -```text -specs/006-go-sync-tool/ -├── spec.md # Feature specification -├── plan.md # This file -├── research.md # Phase 0 output -└── tasks.md # Implementation tasks -``` - -### Source Code (repository root) - -```text -complytime-website/ -├── cmd/ -│ └── sync-content/ -│ ├── main.go # Entry point and orchestration (~440 lines) -│ ├── config.go # Config types and loading (incl. Peribolos types) -│ ├── github.go # GitHub API client and types (incl. peribolos fetch) -│ ├── transform.go # Markdown transforms -│ ├── hugo.go # Hugo page and card generation -│ ├── sync.go # Sync logic and repo processing -│ ├── manifest.go # Manifest I/O and state tracking -│ ├── cleanup.go # Orphan and stale content removal -│ ├── path.go # Path validation utilities -│ ├── lock.go # Content lockfile read/write/query -│ └── *_test.go # Tests mirror source files (10 files) -├── config/ -│ └── _default/ -│ ├── hugo.toml # Site config -│ ├── module.toml # Hugo module mounts (existing) -│ ├── params.toml # Doks theme params (existing) -│ └── menus/ -│ └── menus.en.toml # Navigation menus (Projects entry exists at weight 20) -├── content/ -│ ├── docs/ -│ │ ├── projects/ -│ │ │ ├── _index.md # Hand-maintained section index (committed, has cascade for sidebar collapsing) -│ │ │ └── {repo}/ # Generated per-repo content (gitignored) -│ │ │ ├── _index.md # Section index (frontmatter only, no body) -│ │ │ ├── overview.md # README content as child page -│ │ │ └── {doc}.md # Doc pages from discovery.scan_paths -│ │ └── getting-started/ # Hand-maintained (committed) -├── data/ -│ └── projects.json # Generated landing page cards (gitignored) -├── .sync-manifest.json # Tracks written files for orphan cleanup (gitignored) -├── layouts/ -│ ├── home.html # Landing page (reads data/projects.json dynamically) -│ ├── shortcodes/ -│ │ └── project-cards.html # Project cards shortcode (type-grouped, reads data/projects.json) -│ └── docs/ -│ └── list.html # Docs list with sidebar (already exists) -├── .github/ -│ └── workflows/ -│ ├── deploy-gh-pages.yml # Deploy pipeline (sync at locked SHAs, Hugo build, GitHub Pages) -│ ├── ci.yml # PR validation (test, sync with --lock, build) -│ └── sync-content-check.yml # Weekly content check (--update-lock, PR creation) -├── sync-config.yaml # Declarative file sync manifest (updated) -├── .content-lock.json # Approved upstream SHAs per repo (committed) -├── go.mod # Go module (initialized fresh for the port) -├── go.sum # Go checksums (generated by go mod tidy) -└── .gitignore # Updated with generated path exclusions -``` - -**Structure Decision**: Single-project layout. The sync tool is organized as multiple files within `package main` at `cmd/sync-content/` (10 source files). No separate packages, no `internal/`, no `pkg/`. Files are split by domain (config, GitHub API, transforms, Hugo pages, sync logic, manifest, cleanup, path utils, content lockfile, entry point). Tests mirror source files 1:1. This matches Constitution XIV (Simplicity) — no unnecessary abstractions while keeping each file focused. +Feature specs live in `specs/006-go-sync-tool/` (spec, plan, research, tasks). The full repository layout is documented in [CONTRIBUTING.md](../../CONTRIBUTING.md#project-structure). Key files for this feature: + +- **`cmd/sync-content/`** — 10 Go source files in `package main`, split by domain (config, GitHub API, transforms, Hugo pages, sync logic, manifest, cleanup, path utils, content lockfile, entry point). Tests mirror source files 1:1 (10 test files). No separate packages — Constitution XIV (Simplicity). +- **`sync-config.yaml`** — Declarative config overlay for per-repo file sync. +- **`.content-lock.json`** — Approved upstream SHAs per repo (committed). +- **`.github/workflows/`** — Three workflows: CI (`ci.yml`), deploy (`deploy-gh-pages.yml`), weekly content check (`sync-content-check.yml`). +- **Generated output** — See spec [Output Structure](spec.md#output-structure). ## Constitution Re-Check (Post Phase 1 Design) @@ -132,4 +74,4 @@ A code audit of `cmd/sync-content/` identified 18 findings across security, logi ## Complexity Tracking -No constitution violations. All design choices align with established principles. Hardening phase adds security and correctness fixes without introducing new dependencies or abstractions — consistent with Constitution XIV (Simplicity). Phase 13 (content transform improvements) adds heading casing normalisation, ALL CAPS normalisation, duplicate H1 removal, and a Hugo render heading hook — all within existing files, no new packages or dependencies. Phase 14 (diagram block rewriting) adds Kroki format conversion for upstream diagram code blocks, routing mermaid through server-side Kroki rather than client-side JS — consistent with Constitution V (No Runtime JavaScript Frameworks). +No constitution violations. All design choices align with established principles. Hardening phase adds security and correctness fixes without introducing new dependencies or abstractions — consistent with Constitution XIV (Simplicity). Phase 13 (content transform improvements) adds heading casing normalisation, ALL CAPS normalisation, duplicate H1 removal, and a Hugo render heading hook — all within existing files, no new packages or dependencies. Phase 14 (diagram block rewriting) adds Kroki format conversion for upstream diagram code blocks, routing mermaid through server-side Kroki rather than client-side JS — consistent with Constitution V (No Runtime JavaScript Frameworks). Phase 15 (bug fixes) adds a guard to skip upstream `index.md` files that conflict with Hugo's `_index.md` section convention — no new dependencies, single guard clause in `syncRepoDocPages`. diff --git a/specs/006-go-sync-tool/spec.md b/specs/006-go-sync-tool/spec.md index 797bf69..b61067f 100644 --- a/specs/006-go-sync-tool/spec.md +++ b/specs/006-go-sync-tool/spec.md @@ -1,6 +1,6 @@ # Feature Specification: Go Content Sync Tool -**Feature Branch**: `006-go-sync-tool` +**Feature ID**: `006-go-sync-tool` (active branch: `feat/diagram-rewrite-transform`) **Phase**: 2 (Content Infrastructure) ## Overview @@ -23,13 +23,13 @@ This feature replaces that workflow with a Go CLI tool (`cmd/sync-content/`, 10 | IS-002 | README fetch with base64 decoding and SHA tracking | | IS-003 | Per-repo page generation: section index (`_index.md`, frontmatter only, with `formatRepoTitle` for `title` and raw repo name as `linkTitle` for sidebar; ALL CAPS repo/file names normalised to Title Case) + overview page (`overview.md`, README content) | | IS-004 | Landing page card generation (`data/projects.json`) with type derivation from topics | -| IS-005 | Config-driven file sync with transforms (`inject_frontmatter`, `rewrite_links`, `strip_badges`, `rewrite_diagrams`); all synced content (org-discovered and config-driven) unconditionally receives `stripLeadingH1`, `shiftHeadings`, `titleCaseHeadings`, `stripBadges`, `rewriteDiagramBlocks`, and `rewriteRelativeLinks` | +| IS-005 | Config-driven file sync with transforms (`inject_frontmatter`, `rewrite_links`, `strip_badges`, `rewrite_diagrams`). All synced content unconditionally receives `stripLeadingH1`, `shiftHeadings`, and `titleCaseHeadings`. Org-discovered content (README overviews and doc pages) additionally receives `stripBadges`, `rewriteDiagramBlocks`, and `rewriteRelativeLinks` unconditionally. Config sources apply `stripBadges`, `rewriteRelativeLinks`, and `rewriteDiagramBlocks` only when their respective transform flags are set. | | IS-006 | Concurrent processing with bounded worker pool (`--workers`) | | IS-007 | Dry-run by default; `--write` flag required for disk I/O | -| IS-008 | Markdown transforms: `stripLeadingH1` (removes leading H1 — title already in frontmatter), `shiftHeadings` (H1→H2, H2→H3, …), `titleCaseHeadings` (acronym-aware Title Case for in-page headings and TOC; normalises ALL CAPS words to Title Case while preserving known acronyms from the `knownAcronyms` map in `hugo.go` — ~30 domain terms; maintainers add entries as new projects introduce terminology), `stripBadges`, `rewriteRelativeLinks`, `rewriteDiagramBlocks` (converts fenced diagram code blocks — mermaid, plantuml, d2, graphviz/dot, ditaa, and other Kroki-supported languages — to `kroki {type=…}` format for server-side rendering via Doks' `render-codeblock-kroki.html` hook; `dot` normalised to `graphviz`; routes mermaid through Kroki rather than Doks' client-side `render-codeblock-mermaid.html` to uphold Constitution V) | +| IS-008 | Markdown transforms — **unconditional** (all content): `stripLeadingH1` (removes leading H1 — title already in frontmatter), `shiftHeadings` (H1→H2, H2→H3, …), `titleCaseHeadings` (acronym-aware Title Case for in-page headings and TOC; normalises ALL CAPS words to Title Case while preserving known acronyms from the `knownAcronyms` map in `hugo.go` — ~30 domain terms; maintainers add entries as new projects introduce terminology). **Unconditional for org-discovered, config-gated for config sources**: `stripBadges`, `rewriteRelativeLinks`, `rewriteDiagramBlocks` (converts fenced diagram code blocks — mermaid, plantuml, d2, graphviz/dot, ditaa, and other Kroki-supported languages — to `kroki {type=…}` format for server-side rendering via Doks' `render-codeblock-kroki.html` hook; `dot` normalised to `graphviz`; routes mermaid through Kroki rather than Doks' client-side `render-codeblock-mermaid.html` to uphold Constitution V) | | IS-009 | Repo filtering: `--include`/`--exclude` lists (peribolos is the governance gate; no API metadata filtering) | | IS-012 | Sync manifest (`.sync-manifest.json`) for orphan file tracking | -| IS-014 | Doc page auto-sync from `discovery.scan_paths` directories | +| IS-014 | Doc page auto-sync from `discovery.scan_paths` directories; upstream `index.md` files are skipped to prevent Hugo leaf/branch bundle conflicts with generated `_index.md` section pages | | IS-016 | Single-repo mode (`--repo`): sync only one repository (validated against peribolos) | | IS-017 | Summary file generation (`--summary report.md`) | | IS-018 | GitHub CI outputs: `GITHUB_OUTPUT` variables and `GITHUB_STEP_SUMMARY` | @@ -55,7 +55,14 @@ This feature replaces that workflow with a Go CLI tool (`cmd/sync-content/`, 10 | Repo in peribolos but deleted on GitHub | API metadata fetch returns 404; log warning, skip repo, continue | | `.github` repo missing or peribolos.yaml absent | Fatal error — log and exit non-zero | | `--org` flag value doesn't match peribolos `orgs` key | Fatal error — log mismatch and exit non-zero | -| `--repo` flag used (single-repo mode) | Validated against peribolos — repo must exist in governance registry; metadata fetched from API | +| `--repo` flag used (single-repo mode) | Validated against peribolos — repo must exist in governance registry; metadata fetched from API. No dedicated verification task; covered by `TestParseNameList_RepoFilterOverridesExclude` in `sync_test.go` | + +### Edge Cases (Doc Page Sync) + +| Case | Expected Behavior | +|------|-------------------| +| Upstream repo has `docs/index.md` (e.g. mkdocs landing page) | Skipped with info log — Hugo treats `index.md` as a leaf bundle, which conflicts with the `_index.md` branch bundle the sync tool generates. Content is not lost; the README is already synced as `overview.md`. | +| Upstream repo has `docs/subdir/index.md` | Skipped — same leaf bundle conflict applies to any directory where the sync tool generates `_index.md` section pages for intermediate directories | ## User Stories @@ -204,7 +211,7 @@ All criteria must pass before feature 006 merges to `main`. | SC-003 | Dry-run produces zero files; write mode produces correct output structure | T003, T004 | | SC-004 | Hugo builds with zero errors after sync | T005 | | SC-005 | Auto-discovered repos have section index + overview + card | T006, T007 | -| SC-006 | Config overlay applies transforms at declared dest paths | T008, T009 (deferred until sources declared; code paths covered by unit tests) | +| SC-006 | Config overlay applies transforms at declared dest paths | T008, T009 cancelled (`sources: []`); code paths covered by unit tests (`TestSyncConfigSource`, `TestInjectFrontmatter`, `TestRewriteDiagramBlocks`) | | SC-007 | Change detection skips unchanged repos; stale cleanup removes all files | T010 | | SC-008 | Unit and integration tests pass | T015, T016 | | SC-009 | `go vet` and `gofmt` pass with zero issues | T019 | @@ -216,10 +223,11 @@ All criteria must pass before feature 006 merges to `main`. | SC-015 | `--update-lock` writes current upstream SHAs to lockfile | `lock_test.go` (`TestWriteLock`, `TestWriteLock_DeterministicOrder`) | | SC-016 | Weekly check workflow creates/updates a PR with lockfile changes | `sync-content-check.yml` manual dispatch | | SC-017 | Diagram code blocks in upstream content are rewritten to Kroki format and render server-side (not via client-side JS) | `TestRewriteDiagramBlocks` (12 subtests), `sync.go` pipeline integration (3 call sites) | +| SC-018 | Upstream `index.md` files are skipped during doc page sync to prevent Hugo leaf/branch bundle conflicts | `TestSyncRepoDocPages_SkipsIndexMD` | ## Merge Readiness Gate -All 17 success criteria (SC-001 through SC-017) MUST pass before merging feature 006 to `main`. SC-006 is deferred (blocked on config sources being declared) but its code paths are covered by unit tests (`TestSyncConfigSource`, `TestProcessRepo`). SC-016 requires a manual `workflow_dispatch` run of `sync-content-check.yml` after merge. +All 18 success criteria (SC-001 through SC-018) MUST pass before merging feature 006 to `main`. SC-006 tasks cancelled (config sources not declared); code paths covered by unit tests. SC-016 requires a manual `workflow_dispatch` run of `sync-content-check.yml` after merge. ## Appendix: Legacy ID Cross-Reference diff --git a/specs/006-go-sync-tool/tasks.md b/specs/006-go-sync-tool/tasks.md index b34823d..8276c38 100644 --- a/specs/006-go-sync-tool/tasks.md +++ b/specs/006-go-sync-tool/tasks.md @@ -55,14 +55,14 @@ ## Phase 4: User Story 3 — Config-Driven Precision Sync (Priority: P1) -**Goal**: Config overlay provides precise control over file sync destinations, frontmatter, and transforms for key projects (complyctl, complyscribe, collector-components) without breaking the org scan baseline. +**Goal**: Config overlay code is implemented and unit-tested. Integration verification deferred to the feature that declares config sources. -**Independent Test**: Verify config-declared files appear at their `dest` paths in `sync-config.yaml` with injected frontmatter and applied transforms. +**Independent Test**: Unit tests cover `syncConfigSource`, `injectFrontmatter`, `stripBadges`, `rewriteRelativeLinks`, `rewriteDiagramBlocks`. Integration verification will run when `sync-config.yaml` declares sources. -- [ ] T008 [P] [US3] [DEFERRED] Verify `skip_org_sync` behavior: for repos with `skip_org_sync: true` in `sync-config.yaml`, confirm no auto-generated section index (`_index.md`) or overview page (`overview.md`) exists at `content/docs/projects/{repo}/` BUT the repo's `ProjectCard` is present in `data/projects.json`. *Blocked — `sync-config.yaml` currently has `sources: []`. Unblocked when sources are declared for specific repos. Code paths covered by unit tests (`TestSyncConfigSource`, `TestProcessRepo`).* -- [ ] T009 [P] [US3] [DEFERRED] Verify config file transforms: check config-declared files at their `dest` paths have correct `inject_frontmatter` (YAML frontmatter with title, description, weight), `rewrite_links` (relative links converted to absolute GitHub URLs), `strip_badges` (CI badge lines removed), and `rewrite_diagrams` (diagram code blocks converted to Kroki format) per `sync-config.yaml` transform declarations. *Blocked — `sync-config.yaml` currently has `sources: []`. Unblocked when sources are declared. Code paths covered by unit tests (`TestInjectFrontmatter`, `TestStripBadges`, `TestRewriteRelativeLinks`, `TestRewriteDiagramBlocks`).* +- [ ] ~~T008~~ [P] [US3] [CANCELLED] Verify `skip_org_sync` behavior. *Cancelled — `sync-config.yaml` has `sources: []` with no timeline for config sources. Code paths fully covered by unit tests (`TestSyncConfigSource`, `TestProcessRepo`). Verification will be part of the feature that adds config sources.* +- [ ] ~~T009~~ [P] [US3] [CANCELLED] Verify config file transforms. *Cancelled — same rationale as T008. Code paths fully covered by unit tests (`TestInjectFrontmatter`, `TestStripBadges`, `TestRewriteRelativeLinks`, `TestRewriteDiagramBlocks`). Verification will be part of the feature that adds config sources.* -**Checkpoint**: Hybrid mode works — org scan provides the baseline, config overlay provides precision. +**Checkpoint**: Config overlay code paths are unit-tested. Integration verification deferred to the feature that declares config sources. --- @@ -236,6 +236,18 @@ --- +## Phase 15: Bug Fixes + +**Purpose**: Address production issues discovered during live site validation. + +**Ref**: IS-014 (updated), SC-018 + +- [x] T066 [US1] Skip `index.md` files in `syncRepoDocPages` in `cmd/sync-content/sync.go`: add a guard that skips files named `index.md` (case-insensitive) during doc page auto-sync. Hugo treats `index.md` as a leaf bundle, which conflicts with the `_index.md` branch bundle (section page) the sync tool generates. The conflict caused complyscribe's section to render as a flat "Index" page in the sidebar, hiding all child pages. Tested by `TestSyncRepoDocPages_SkipsIndexMD` in `sync_test.go`. Validates SC-018. *(Done — guard added with info-level log. Test verifies index.md is neither fetched nor written, while sibling files sync normally.)* + +**Checkpoint**: Upstream `index.md` files (e.g. mkdocs landing pages) no longer create Hugo leaf bundle conflicts. Project sections render correctly in the sidebar with all child pages visible. + +--- + ## Appendix: Implicit Coverage Note > Tasks T003 (dry-run) and T004 (write mode) implicitly exercise the `--timeout`, `--workers` flags, the `maxRetries` constant, and byte-level dedup at their default values. Dedicated isolated tests for these parameters are covered by US6 unit tests (T015, T016) and the race detector run (T017). Hardening phase (Phase 8) covers adversarial and defensive scenarios not reached by happy-path verification. @@ -244,6 +256,6 @@ > > IS-016 (single-repo mode via `--repo`) has no dedicated verification task. The flag is functional and exercised by unit tests in `sync_test.go` (`TestParseNameList_RepoFilterOverridesExclude`). It was ported from the reference implementation and is a convenience shortcut for `--include` with a single repo — no separate integration-level task was needed. > -> IS-017 (summary file generation via `--summary`) verification is included in T024 rather than a standalone task. The `toMarkdown()` method that generates summary content has no dedicated unit test — its output is exercised by integration-level CI runs. +> IS-017 (summary file generation via `--summary`) verification is included in T024 rather than a standalone task. The `toMarkdown()` method that generates summary content has no dedicated unit test — its output is exercised by integration-level CI runs. A targeted unit test for `toMarkdown()` (covering added/updated/removed/empty states) would improve coverage but is low priority given the method's simplicity. From 3a72c43bbcd31f33113ec859a2df5e6a8cd7f8ec Mon Sep 17 00:00:00 2001 From: Sonu Preetam Date: Thu, 26 Mar 2026 10:50:28 -0400 Subject: [PATCH 5/5] feat: update with Dockerfile hadolint findings Signed-off-by: Sonu Preetam --- .devcontainer/Dockerfile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 215a180..1811507 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,12 +1,14 @@ FROM mcr.microsoft.com/devcontainers/base:ubuntu-22.04 +# hadolint ignore=DL3008 RUN apt-get update && \ - apt-get install -y \ + apt-get install -y --no-install-recommends \ ca-certificates \ nodejs \ npm \ wget && \ - update-ca-certificates + update-ca-certificates && \ + rm -rf /var/lib/apt/lists/* ARG HUGO_VERSION="0.155.1" RUN wget --quiet "https://github.com/gohugoio/hugo/releases/download/v${HUGO_VERSION}/hugo_extended_${HUGO_VERSION}_Linux-64bit.tar.gz" && \