diff --git a/.content-lock.json b/.content-lock.json new file mode 100644 index 0000000..03be823 --- /dev/null +++ b/.content-lock.json @@ -0,0 +1,3 @@ +{ + "repos": {} +} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..6616112 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,46 @@ +--- +name: CI + +on: + pull_request: + branches: [main] + +permissions: + contents: read + +jobs: + validate: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd + + - name: Setup Go + uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 + with: + go-version-file: go.mod + + - name: Setup Node.js + uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 + with: + node-version: '22' + + - name: Setup Hugo + uses: peaceiris/actions-hugo@75d2e84710de30f6ff7268e08f310b60ef14033f + with: + hugo-version: '0.155.1' + extended: true + + - name: Install Node dependencies + run: npm ci + + - name: Run tests + run: go test -race ./cmd/sync-content/... + + - name: Sync content + run: go run ./cmd/sync-content --org complytime --config sync-config.yaml --lock .content-lock.json --write + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Build site + run: hugo --minify --gc diff --git a/.github/workflows/deploy-gh-pages.yml b/.github/workflows/deploy-gh-pages.yml index 7cafca9..5bf4112 100644 --- a/.github/workflows/deploy-gh-pages.yml +++ b/.github/workflows/deploy-gh-pages.yml @@ -5,8 +5,6 @@ on: push: branches: main - workflow_dispatch: - permissions: contents: read pages: write @@ -23,6 +21,11 @@ jobs: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd + - name: Setup Go + uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 + with: + go-version-file: go.mod + - name: Setup Node.js uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 with: @@ -37,8 +40,28 @@ jobs: - name: Install dependencies run: npm ci + - name: Go vet + run: go vet ./... + + - name: Check formatting + run: | + unformatted=$(gofmt -l ./cmd/sync-content/) + if [ -n "$unformatted" ]; then + echo "::error::Unformatted Go files:" + echo "$unformatted" + exit 1 + fi + + - name: Run tests + run: go test -race ./cmd/sync-content/... + + - name: Sync content + run: go run ./cmd/sync-content --org complytime --config sync-config.yaml --lock .content-lock.json --write + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Build - run: hugo --minify --gc --baseURL "https://complytime.dev/" + run: hugo --minify --gc - name: Upload artifact uses: actions/upload-pages-artifact@7b1f4a764d45c48632c6b24a0339c27f5614fb0b diff --git a/.github/workflows/sync-content-check.yml b/.github/workflows/sync-content-check.yml new file mode 100644 index 0000000..13ba1eb --- /dev/null +++ b/.github/workflows/sync-content-check.yml @@ -0,0 +1,40 @@ +--- +name: Content Sync Check + +on: + schedule: + - cron: '0 6 * * 1' + +permissions: + contents: read + +jobs: + content-check: + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd + + - name: Setup Go + uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 + with: + go-version-file: go.mod + + - name: Check for upstream changes + run: go run ./cmd/sync-content --org complytime --config sync-config.yaml --lock .content-lock.json --update-lock --summary sync-summary.md + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Create or update PR + uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 + with: + add-paths: .content-lock.json + branch: automated/content-sync-update + commit-message: "content: update upstream documentation lockfile" + title: "content: update upstream documentation" + body-path: sync-summary.md + labels: automated, documentation + delete-branch: true diff --git a/.gitignore b/.gitignore index 290221d..a699212 100644 --- a/.gitignore +++ b/.gitignore @@ -10,12 +10,19 @@ node_modules/ # ─── Go ────────────────────────────────────────────────────────────── # Compiled sync engine binary (built by CI or locally). -sync-content +/sync-content # ─── Synced content (generated by sync-content at build time) ──────── -# Project subdirectories (complyctl, complyscribe, complytime-collector-components) -# are temporarily committed; they will be removed once sync-content runs in CI. -# The _index.md landing page is hand-authored and tracked. +# Per-repo project pages generated by the org scan and config overlay. +# The section-level _index.md at content/docs/projects/_index.md is +# hand-maintained and tracked — only repo subdirectories are ignored. +content/docs/projects/*/ + +# Landing page card data generated by the sync tool. +data/projects.json + +# Sync manifest (tracks files written by sync-content for orphan cleanup). +.sync-manifest.json # ─── OS files ──────────────────────────────────────────────────────── .DS_Store diff --git a/.mega-linter.yml b/.mega-linter.yml new file mode 100644 index 0000000..b404cae --- /dev/null +++ b/.mega-linter.yml @@ -0,0 +1,11 @@ +# https://megalinter.io/latest/supported-linters/ +ENABLE_LINTERS: + - ACTION_ACTIONLINT + - DOCKERFILE_HADOLINT + - GO_GOLANGCI_LINT + - MARKDOWN_MARKDOWNLINT + - REPOSITORY_GITLEAKS + - REPOSITORY_KICS + - YAML_YAMLLINT +# fd54f200: "Missing User Instruction" — false positive for devcontainer Dockerfile +REPOSITORY_KICS_ARGUMENTS: "--fail-on high --exclude-queries fd54f200-402c-4333-a5a4-36ef6709af2f" diff --git a/.specify/constitution.md b/.specify/constitution.md new file mode 100644 index 0000000..4bc703f --- /dev/null +++ b/.specify/constitution.md @@ -0,0 +1,95 @@ +# ComplyTime Website Constitution + +## Core Principles + +### I. Hugo + Doks + +The site framework is [Hugo](https://gohugo.io/) (extended) with the [Thulite Doks](https://getdoks.org/) theme. No alternative static site generators, themes, or frontend frameworks are permitted. All theme customization is done through SCSS overrides and Hugo layout overrides — not by forking or vendoring the theme. + +### II. Go Tooling + +All custom tooling (content sync, CLI utilities, build helpers) MUST be written in Go. The Go module in `go.mod` is shared with Hugo Modules. Third-party Go dependencies MUST be minimized; new dependencies require documented justification. + +### III. Single Source of Truth + +Every piece of project content on the site MUST trace back to a canonical source — a repo README, a `docs/` directory, or the GitHub API. The org's governance registry (`peribolos.yaml` in the `.github` repo) is the authoritative source for which repositories exist. Automated tooling is the sole mechanism for pulling upstream content. Manual content duplication is prohibited. If the source changes, the site updates on the next sync. + +### IV. Governance-Driven Discovery with Config Overlay + +The sync tool derives the set of eligible repositories from the org's governance registry rather than ad-hoc API discovery. Per-repo metadata (stars, language, topics) is fetched from the GitHub API. For repos requiring precise control (frontmatter, transforms, specific files), a declarative config overlay adds file-level syncs on top. The governance registry is the baseline; config is the precision layer. + +### V. No Runtime JavaScript Frameworks + +The site is statically generated. Client-side interactivity is limited to what Doks provides (FlexSearch, dark mode toggle, navigation). Custom JavaScript MUST be minimal and progressive — the site MUST function fully without JavaScript except for search. + +### VI. Match the ComplyTime Brand + +The site's visual design uses the established color palette, typography, and dark-theme-first aesthetic defined in the SCSS variables. Visual changes MUST maintain brand consistency and MUST NOT introduce new design systems or CSS frameworks beyond what Doks provides. + +### VII. Responsive and Accessible + +All pages MUST meet WCAG 2.1 AA. The site MUST be fully usable on mobile, tablet, and desktop viewports. Color contrast, keyboard navigation, alt text, and ARIA labels are mandatory for all new content and layouts. + +### VIII. Performance + +Hugo builds MUST complete in under two seconds for the current content volume. Pages MUST achieve a Lighthouse performance score of 90+. PurgeCSS is configured via PostCSS to eliminate unused styles from production builds. + +## Development Standards + +### IX. SPDX License Headers + +Every Go source file MUST include `// SPDX-License-Identifier: Apache-2.0` as the first comment line. + +### X. Go Code Quality + +All Go code MUST pass `go vet`, `gofmt`, and any linter checks configured in the CI workflow (see `.github/workflows/ci.yml`) before merge. Errors MUST always be checked and returned — never silently discarded. + +### XI. Structured Logging + +The Go sync tool MUST use `log/slog` for all logging. All log entries MUST include relevant structured fields (`repo`, `path`, `sha`, `error`). No `fmt.Println` or `log.Printf` for operational output. + +### XII. Dry-Run by Default + +The sync tool MUST default to dry-run mode. The `--write` flag is required for any disk I/O. This protects contributors from accidentally overwriting their local working tree. + +### XIII. Generated Content Is Not Committed + +All sync tool output (project pages, card data) is derived from the GitHub API and MUST be gitignored. The repository tracks only source files: Go code, config, templates, hand-authored content, and styling. CI generates all derived content from scratch on every build. Control files that gate what is generated (e.g. content lockfiles) ARE committed because they represent reviewed approval state, not derived content. + +### XIV. Simplicity + +Start simple, apply "You Aren't Gonna Need It" (YAGNI) principle. No abstractions without proven need. Tooling favors flat, domain-organised source files over deep package hierarchies. Complexity MUST be justified against a simpler alternative. + +## Operations + +### XV. GitHub Actions CI/CD + +Build, sync, and deployment are fully automated via GitHub Actions. No manual deployment steps. The workflow model includes: + +1. **CI** — validates PRs with dry-run sync, Go checks, and Hugo build. +2. **Content Sync Check** — runs periodically to detect upstream changes and open a PR for human review. +3. **Deploy** — on push to the default branch, syncs content at approved SHAs, builds Hugo, and publishes to GitHub Pages. + +Upstream content changes MUST be reviewed via a content sync PR before reaching production. No unreviewed content is deployed. + +### XVI. GitHub Pages Hosting + +The site is hosted on GitHub Pages. No other hosting platforms are permitted without an amendment to this constitution. + +## Licensing + +### XVII. Apache 2.0 + +All website code, tooling, and original content is licensed under Apache License 2.0. Synced content retains its upstream license. + +## Governance + +This constitution supersedes all other practices for the complytime-website repository. Amendments require: + +1. A documented proposal explaining the change and its rationale. +2. Update to this file with version increment per semantic versioning (MAJOR for principle removal/redefinition, MINOR for additions, PATCH for clarifications). +3. Propagation check across any dependent specs, plans, or task files. + +All PRs and reviews MUST verify compliance with these principles. + +**Version**: 1.5.0 | **Ratified**: 2026-03-11 | **Last Amended**: 2026-03-16 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4d8fcb8..a6f7694 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -43,6 +43,9 @@ oriented quickly. | Edit the homepage | `layouts/home.html` + `content/_index.md` | | Site-wide Hugo settings | `config/_default/hugo.toml` | | Theme/feature parameters | `config/_default/params.toml` | +| Sync project content | `go run ./cmd/sync-content --org complytime --config sync-config.yaml --write` | +| Configure file sync | `sync-config.yaml` | +| Run sync tool tests | `go test -race ./cmd/sync-content/...` | --- @@ -50,13 +53,14 @@ oriented quickly. | Tool | Version | Notes | |------------|-----------|-------------------------------------------| -| **Node.js** | ≥ 20.11.0 | Required by Hugo/Doks pipeline | +| **Node.js** | ≥ 22 | Required by Hugo/Doks pipeline | | **npm** | (bundled) | Comes with Node.js | -| **Hugo** | ≥ 0.148.1 | Extended edition; installed via npm scripts | +| **Hugo** | ≥ 0.155.1 | Extended edition; installed via npm scripts | +| **Go** | ≥ 1.25 | Required for the content sync tool | | **Git** | Any recent | For cloning and version control | > **Tip:** If you only want to edit Markdown content, Node.js + npm is all you -> need. +> need. Go is only required if you need to run the sync tool or its tests. --- @@ -70,13 +74,20 @@ cd website # 2. Install dependencies npm install -# 3. Start the dev server (live reload) +# 3. Sync project content from GitHub (generates project pages and cards) +go run ./cmd/sync-content --org complytime --config sync-config.yaml --write + +# 4. Start the dev server (live reload) npm run dev ``` The site will be available at **http://localhost:1313/**. Hugo's dev server watches for file changes and rebuilds automatically. +> **Note:** Step 3 fetches README content and metadata from the `complytime` +> GitHub org. Set the `GITHUB_TOKEN` environment variable for higher API rate +> limits. Without it, unauthenticated requests are limited to 60/hour. + ### Other Useful Commands ```bash @@ -88,6 +99,12 @@ npm run preview # Format files with Prettier npm run format + +# Sync content in dry-run mode (preview without writing) +go run ./cmd/sync-content --org complytime --config sync-config.yaml + +# Run Go tests +go test -race ./cmd/sync-content/... ``` --- @@ -102,6 +119,19 @@ website/ │ ├── _variables-custom.scss # Theme colors, fonts, card styles │ └── _custom.scss # Additional custom CSS │ +├── cmd/sync-content/ # Go content sync tool (package main, 10 source files) +│ ├── main.go # Entry point and orchestration (~440 lines) +│ ├── config.go # Config types and loading (incl. Peribolos types) +│ ├── github.go # GitHub API client and types (incl. peribolos fetch) +│ ├── transform.go # Markdown transforms (links, badges, headings) +│ ├── hugo.go # Hugo page and card generation +│ ├── sync.go # Sync logic, result tracking, repo processing +│ ├── manifest.go # Manifest I/O and state tracking +│ ├── cleanup.go # Orphan and stale content removal +│ ├── path.go # Path validation utilities +│ ├── lock.go # Content lockfile read/write/query +│ └── *_test.go # Tests mirror source files (10 files, ~2300 lines) +│ ├── config/_default/ # Hugo configuration (TOML) │ ├── hugo.toml # Core Hugo settings │ ├── params.toml # Doks theme parameters @@ -114,7 +144,12 @@ website/ │ ├── privacy.md # Privacy policy │ └── docs/ │ ├── getting-started/ # Getting started guide -│ └── projects/ # Project pages (complyctl, etc.) +│ └── projects/ # Project pages (generated by sync tool) +│ ├── _index.md # Hand-maintained section index (committed) +│ └── {repo}/ # Generated per-repo content (gitignored) +│ +├── data/ +│ └── projects.json # Generated landing page cards (gitignored) │ ├── layouts/ # Custom Hugo layout overrides │ ├── home.html # Homepage template (hero + features) @@ -122,6 +157,9 @@ website/ │ └── _default/_markup/ │ └── render-image.html # Custom image render hook │ +├── sync-config.yaml # Declarative file sync manifest +├── .content-lock.json # Approved upstream SHAs per repo (committed) +├── go.mod # Go module (sync tool) ├── static/ # Static assets (copied as-is to output) ├── images/ # Project logos and illustrations └── package.json # Node.js dependencies and scripts @@ -172,18 +210,18 @@ Your Markdown content goes here... ### Add a New Project Page -1. Create a new file: `content/docs/projects/my-project.md` -2. Add frontmatter (see template above) -3. The page appears automatically in the Projects section sidebar +Project pages under `content/docs/projects/` are **automatically generated** by +the sync tool from GitHub org repositories. You do not need to create them +manually. To add a new project: -For a project with sub-pages, create a folder instead: +1. Create the repository in the `complytime` GitHub org +2. Run the sync tool: `go run ./cmd/sync-content --org complytime --config sync-config.yaml --write` +3. The repo will automatically get a section index, overview page, and landing + page card -``` -content/docs/projects/my-project/ -├── _index.md # Project overview (required) -├── installation.md # Sub-page -└── quick-start.md # Sub-page -``` +For repos needing custom file sync with transforms (e.g., specific doc pages +with injected frontmatter), add a source entry in `sync-config.yaml`. See +`specs/006-go-sync-tool/quickstart.md` for details. ### Change Navigation Menus @@ -297,13 +335,31 @@ the local `layouts/home.html` is used instead. ## CI/CD and Deployment +Three GitHub Actions workflows automate the pipeline: + +### PR Validation (`ci.yml`) + +- **Trigger:** Pull requests targeting `main` +- **What it does:** Runs `go test -race`, syncs content with `--lock` and + `--write` (at approved SHAs), and Hugo build to validate changes before merge + +### Content Sync Check (`sync-content-check.yml`) + +- **Trigger:** Weekly (Monday 06:00 UTC) +- **What it does:** Runs `--update-lock` to detect upstream SHA changes. + Opens/updates a PR with `.content-lock.json` changes for review + ### GitHub Pages Deployment (`deploy-gh-pages.yml`) - **Trigger:** Push to `main` -- **What it does:** Installs Node.js + Hugo, runs `hugo --minify --gc`, uploads - to GitHub Pages +- **What it does:** Sets up Go + Node.js + Hugo, runs the sync tool with + `--lock .content-lock.json --write` to generate content at approved SHAs, + then builds with `hugo --minify --gc` and uploads to GitHub Pages - **Pinned actions:** All GitHub Actions use SHA-pinned versions for security +> **Note:** Upstream content changes require a reviewed PR (opened by the weekly +> check workflow) before reaching production. No unreviewed content is deployed. + --- ## Coding Conventions @@ -357,6 +413,8 @@ style: fix indentation in home template - [ ] Pages render correctly on the dev server - [ ] No broken links or missing images - [ ] Frontmatter includes all required fields (`title`, `description`, `weight`) +- [ ] If Go code was changed: `go vet ./...` and `gofmt -l ./cmd/sync-content/` are clean +- [ ] If Go code was changed: `go test -race ./cmd/sync-content/...` passes - [ ] Commit messages follow conventional format - [ ] DCO sign-off is present diff --git a/README.md b/README.md index 7fc8380..696725a 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,10 @@ Built with [Hugo](https://gohugo.io/) and the [Doks](https://getdoks.org/) theme ### Prerequisites -- [Node.js](https://nodejs.org/) v20.11 or later +- [Node.js](https://nodejs.org/) v22 or later - [npm](https://www.npmjs.com/) (included with Node.js) +- [Go](https://go.dev/) 1.25+ (for the content sync tool) +- (Recommended) `GITHUB_TOKEN` env var for higher API rate limits ### Development @@ -17,6 +19,9 @@ Built with [Hugo](https://gohugo.io/) and the [Doks](https://getdoks.org/) theme # Install dependencies npm install +# Sync project content from GitHub org (generates project pages and cards) +go run ./cmd/sync-content --org complytime --config sync-config.yaml --write + # Start development server npm run dev ``` @@ -43,6 +48,7 @@ website/ │ └── common/ │ ├── _custom.scss │ └── _variables-custom.scss +├── cmd/sync-content/ # Go content sync tool (10 source files, package main) ├── config/ # Hugo configuration │ ├── _default/ │ │ ├── hugo.toml @@ -56,16 +62,21 @@ website/ │ ├── _index.md # Homepage │ ├── docs/ # Documentation │ │ ├── getting-started/ -│ │ └── projects/ # Project pages (complyctl, complyscribe, etc.) +│ │ └── projects/ # Project pages (generated by sync tool, gitignored) │ └── privacy.md ├── layouts/ # Custom layouts -│ ├── home.html # Homepage layout +│ ├── home.html # Homepage layout (reads data/projects.json) │ └── docs/ │ └── list.html # Docs section listing layout ├── static/ # Static assets (favicons, icons) +├── sync-config.yaml # Declarative sync configuration +├── .content-lock.json # Approved upstream SHAs per repo (committed) +├── go.mod / go.sum # Go module and checksums ├── .github/ │ └── workflows/ -│ └── deploy-gh-pages.yml # CI/CD deployment +│ ├── deploy-gh-pages.yml # Deploy (sync at locked SHAs + Hugo build) +│ ├── ci.yml # PR validation (lint, test, dry-run, build) +│ └── sync-content-check.yml # Weekly content check (opens PR) └── package.json ``` @@ -115,7 +126,13 @@ Site configuration is in `config/_default/`: ## 🚢 Deployment -The site is deployed to GitHub Pages via the `.github/workflows/deploy-gh-pages.yml` workflow. On push to the configured branch, GitHub Actions builds the site with Hugo and deploys the `public/` directory. +The site uses three GitHub Actions workflows: + +- **`ci.yml`** — validates PRs with `go test -race`, content sync (with `--lock --write`), and Hugo build +- **`sync-content-check.yml`** — runs weekly to detect upstream doc changes and opens a PR to update `.content-lock.json` +- **`deploy-gh-pages.yml`** — on push to `main`, syncs content at approved SHAs, builds with Hugo, and deploys `public/` to GitHub Pages + +Upstream content changes require a reviewed PR before reaching production. ## 🤝 Contributing diff --git a/cmd/sync-content/README.md b/cmd/sync-content/README.md new file mode 100644 index 0000000..0e1c8d9 --- /dev/null +++ b/cmd/sync-content/README.md @@ -0,0 +1,552 @@ +# sync-content + +A Go CLI tool that pulls documentation from upstream GitHub repositories into the +ComplyTime website's Hugo content tree. It reads the org's governance registry +(`peribolos.yaml` in the `.github` repo) to determine which repositories exist, +enriches each with GitHub API metadata, generates per-project documentation pages +and landing-page card data, then layers precise config-driven file syncs on top. + +**No generated content is committed to git.** The tool runs at build time (in CI) +or on-demand (locally) to populate the site. This keeps the repository lean and +ensures documentation is always sourced from upstream. + +## How It Works + +The tool operates in **hybrid mode** with two complementary phases: + +### Phase 1: Governance-Driven Discovery (automatic) + +Fetches `peribolos.yaml` from the org's `.github` repo to get the authoritative +list of repositories, then enriches each with metadata from the GitHub REST API. +For each eligible repo it: + +1. Fetches the README and branch HEAD SHA. +2. Generates two Hugo pages per project: + - `content/docs/projects/{repo}/_index.md` — a section index with metadata + frontmatter (title, description, dates, language, stars, SEO metadata, + source/README SHAs). Contains no body content; the Doks sidebar renders + this as a collapsible section heading. + - `content/docs/projects/{repo}/overview.md` — the README content as a + navigable child page with edit URL. +3. Normalises casing: ALL CAPS filenames (e.g. `CONTRIBUTING.md`) and headings become Title Case (`Contributing`); known acronyms (API, OSCAL, CLI, …) are preserved. +4. Shifts all Markdown headings down one level (H1→H2, H2→H3, …) so Hugo's page title is the sole H1. +5. Strips CI badge lines from the top of the README. +6. Rewrites relative Markdown links and images to absolute GitHub URLs. +7. Scans for doc pages under configurable `scan_paths` (e.g. `docs/`). +8. Builds a `ProjectCard` for the landing page. + +After processing all repos, the tool writes `data/projects.json` — an array of +`ProjectCard` objects that Hugo templates use to render the "Our Projects" section. + +### Phase 2: Config Sync (opt-in) + +Reads `sync-config.yaml` and pulls specific files with per-file transforms: + +- **Frontmatter injection** — prepend YAML frontmatter with arbitrary key-value + pairs, or replace existing frontmatter. +- **Link rewriting** — convert relative Markdown links to absolute GitHub blob + URLs and relative images to raw.githubusercontent URLs. +- **Badge stripping** — remove CI/status badge lines from the top of content. + +Config sources can operate alongside or instead of the org scan per-repo: + +| `skip_org_sync` | Org scan page | Config files | ProjectCard | +|-----------------|---------------|--------------|-------------| +| `false` (default) | Generated from README | Synced as additional content | Yes | +| `true` | Suppressed | Synced as primary content | Yes | + +## Quick Start + +### Prerequisites + +- **Go 1.25+** — the sync tool is pure Go with one dependency (`gopkg.in/yaml.v3`) +- **Node.js 22+** — for the Hugo/Doks theme build (`npm ci`) +- **Hugo extended** — the static site generator +- **`GITHUB_TOKEN`** (recommended) — unauthenticated rate limit is 60 requests/hour + +### 1. Dry-run (preview without writing) + +```bash +go run ./cmd/sync-content --org complytime --config sync-config.yaml +``` + +Logs every action the tool would take but creates zero files. This is the default +mode — you must explicitly opt in to writes. + +### 2. Write mode (generate content) + +```bash +go run ./cmd/sync-content --org complytime --config sync-config.yaml --write +``` + +Produces: + +| Output | Path | +|--------|------| +| Per-repo section index | `content/docs/projects/{repo}/_index.md` | +| Per-repo README page | `content/docs/projects/{repo}/overview.md` | +| Auto-discovered doc pages | `content/docs/projects/{repo}/*.md` | +| Landing page card data | `data/projects.json` | +| Sync manifest | `.sync-manifest.json` | +| Content lockfile (with `--update-lock`) | `.content-lock.json` | + +### 3. Start Hugo + +```bash +npm run dev +``` + +Navigate to `http://localhost:1313/`. Project pages appear at `/docs/projects/`. + +### 4. Build for production + +```bash +# Local dev (fetches HEAD): +go run ./cmd/sync-content --org complytime --config sync-config.yaml --write + +# Production (fetches at approved SHAs): +go run ./cmd/sync-content --org complytime --config sync-config.yaml --lock .content-lock.json --write + +hugo --minify --gc +``` + +Output is in `public/`. The `--lock` flag ensures content matches the approved +SHAs in `.content-lock.json`. Omit it for local development to fetch latest HEAD. + +## CLI Reference + +| Flag | Default | Description | +|------|---------|-------------| +| `--org` | `complytime` | GitHub organization (reads `peribolos.yaml` from `{org}/.github` repo) | +| `--token` | `$GITHUB_TOKEN` | GitHub API token (or set the env var) | +| `--config` | _(none)_ | Path to `sync-config.yaml` for config-driven file syncs | +| `--write` | `false` | Apply changes to disk (without this flag, everything is a dry-run) | +| `--output` | `.` | Hugo site root directory | +| `--repo` | _(none)_ | Sync only this repo, e.g. `complytime/complyctl` | +| `--include` | _(all)_ | Comma-separated repo allowlist (empty = all eligible repos) | +| `--exclude` | _(see config)_ | Comma-separated repo names to skip; merged with `discovery.ignore_repos` in `sync-config.yaml` | +| `--workers` | `5` | Maximum concurrent repo processing goroutines | +| `--timeout` | `3m` | Overall timeout for all API operations | +| `--summary` | _(none)_ | Write a Markdown change summary to this file (for PR bodies) | +| `--lock` | _(none)_ | Path to `.content-lock.json` for content approval gating | +| `--update-lock` | `false` | Write current upstream SHAs to the lockfile (requires `--lock`) | + +## Common Tasks + +### Sync a single repository + +```bash +go run ./cmd/sync-content --repo complytime/complyctl --config sync-config.yaml --write +``` + +### Generate a change summary for PR review + +```bash +go run ./cmd/sync-content --org complytime --config sync-config.yaml --write \ + --summary sync-report.md +``` + +The summary file contains a Markdown report with new/updated/removed repos and +stats. + +### Increase concurrency for faster syncs + +```bash +go run ./cmd/sync-content --org complytime --workers 10 --write +``` + +## Configuration + +The config file `sync-config.yaml` lives at the repository root. It has three +sections: + +### `defaults` + +Fallback values applied to every source unless overridden. + +```yaml +defaults: + branch: main +``` + +### `discovery` + +Controls repo filtering and automatic doc page scanning. + +```yaml +discovery: + ignore_repos: + - .github # repo names to exclude from sync + - website + scan_paths: + - docs # directories to scan for .md files + ignore_files: + - CHANGELOG.md # filenames to skip during scanning + - CODE_OF_CONDUCT.md +``` + +`ignore_repos` filters repos out of the peribolos-driven list. When `scan_paths` +is set, the tool recursively lists `.md` files under each path for every eligible +repo and syncs them as doc pages at +`content/docs/projects/{repo}/{relative-path}`. Files already declared in +`sources` or listed in `ignore_files` are skipped. + +### `sources` + +Declares specific files to sync with fine-grained control. + +```yaml +sources: + - repo: complytime/complyctl + branch: main # optional, inherits from defaults + skip_org_sync: true # suppress auto-generated README page + files: + - src: README.md + dest: content/docs/projects/complyctl/_index.md + transform: + inject_frontmatter: + title: "complyctl" + description: "A compliance CLI tool." + weight: 10 + rewrite_links: true + strip_badges: true + + - src: docs/QUICK_START.md + dest: content/docs/projects/complyctl/quick-start.md + transform: + inject_frontmatter: + title: "Quick Start" + description: "Getting started with complyctl." + weight: 20 + rewrite_links: true +``` + +Each `files` entry maps one upstream file (`src`) to one local destination +(`dest`) with optional transforms. + +## Architecture + +### Data Flow + +``` +GitHub REST API + │ + ├─ GET /repos/{org}/.github/contents/peribolos.yaml → governance registry + ├─ GET /repos/{owner}/{repo} → per-repo metadata enrichment + ├─ GET /repos/{owner}/{repo}/readme → fetch README content + SHA + ├─ GET /repos/{owner}/{repo}/branches/{branch} → branch HEAD SHA + ├─ GET /repos/{owner}/{repo}/contents/{path} → fetch config-declared files + └─ GET /repos/{owner}/{repo}/contents/{dir} → list docs/ for doc page scanning + │ + ▼ + ┌─────────────────────────────────────────────┐ + │ sync-content │ + │ │ + │ Governance Discovery ──┐ │ + │ • read peribolos.yaml│ │ + │ • enrich via API ├─→ Project Pages │ + │ • fetch READMEs │ ProjectCards │ + │ • scan doc pages │ │ + │ │ │ + │ Config Sync ───────────┤ │ + │ • fetch declared ├─→ Config Files │ + │ files │ (with transforms)│ + │ • apply transforms │ │ + │ │ │ + │ Change Detection ──────┤ │ + │ • branch SHA cache ├─→ Skip unchanged │ + │ • README blob SHA │ │ + │ • byte-level dedup │ │ + │ │ │ + │ Orphan Cleanup ────────┘ │ + │ • manifest diffing ──→ Remove stale │ + │ • empty dir pruning files │ + └─────────────────────────────────────────────┘ + │ + ▼ + Hugo Content Tree + ├─ content/docs/projects/{repo}/_index.md (section index) + ├─ content/docs/projects/{repo}/overview.md (README content) + ├─ content/docs/projects/{repo}/*.md (discovered docs) + ├─ data/projects.json + ├─ .sync-manifest.json + └─ .content-lock.json (committed, updated by --update-lock) +``` + +### Key Design Decisions + +**Dry-run by default.** The tool never writes to disk unless `--write` is passed. +This makes it safe to run in CI for validation and locally for exploration. + +**Two-tier change detection.** On each run the tool reads `source_sha` and +`readme_sha` from existing project page frontmatter. If the branch HEAD SHA +hasn't changed, all fetches for that repo are skipped (fast path). If the branch +moved but the README blob SHA is identical, the repo is classified as unchanged. +This minimizes API calls and disk writes. + +**Manifest-based orphan cleanup.** A `.sync-manifest.json` file tracks every file +written during a sync run. On the next run, files in the old manifest but absent +from the current run are deleted, and empty parent directories are pruned. This +handles repos being renamed or removed from peribolos. + +**Idempotent writes.** Before writing a file, the tool reads the existing content +and compares bytes. If identical, the write is skipped entirely. This means +running the tool twice in succession produces zero disk I/O on the second run. + +**Provenance comments.** Every synced file includes an HTML comment after the +frontmatter: + +``` + +``` + +This makes it trivial to trace any page back to its upstream source and commit. + +**Bounded concurrency with rate-limit awareness.** A worker pool (default 5, +configurable via `--workers`) processes repos concurrently. The API client retries +on HTTP 403/429 with exponential backoff, respecting `Retry-After` and +`X-RateLimit-Reset` headers. A global context timeout (default 3 minutes) prevents +runaway execution. + +**Content approval gate.** A committed `.content-lock.json` file pins each repo +to an approved branch SHA. The deploy workflow fetches content at locked SHAs — +not HEAD. A weekly check workflow (`sync-content-check.yml`) detects upstream +changes and opens a PR to update the lockfile. This prevents broken or +undesirable content from reaching production without human review. + +**Single package, single dependency.** The entire tool lives in `package main` within `cmd/sync-content/` — domain-organised source files, one third-party dependency (`gopkg.in/yaml.v3`). No separate packages, no interfaces, no abstractions beyond what the problem requires. + +### Output Entities + +#### ProjectCard (`data/projects.json`) + +```json +{ + "name": "complyctl", + "language": "Go", + "type": "CLI Tool", + "description": "A compliance CLI tool for Kubernetes.", + "url": "/docs/projects/complyctl/", + "repo": "https://github.com/complytime/complyctl", + "stars": 42 +} +``` + +The `type` field is derived from repo topics and description using keyword +matching: + +| Keywords | Type | +|----------|------| +| `cli` topic, "command-line" or " cli" in description | CLI Tool | +| `automation` topic, "automat" in description | Automation | +| `observability` topic, "observability" or "collector" in description | Observability | +| `framework` topic, "framework" or "bridging" in description | Framework | +| _(default)_ | Library | + +#### Section Index Frontmatter (`_index.md`) + +```yaml +--- +title: "Complyctl" +linkTitle: "complyctl" +description: "A compliance CLI tool for Kubernetes." +date: 2026-03-10T18:30:00Z +lastmod: 2026-03-10T18:30:00Z +draft: false +toc: false +params: + language: "Go" + stars: 42 + repo: "https://github.com/complytime/complyctl" + source_sha: "abc123def456" + readme_sha: "def789abc012" + seo: + title: "Complyctl | ComplyTime" + description: "A compliance CLI tool for Kubernetes." +--- +``` + +#### Overview Page Frontmatter (`overview.md`) + +```yaml +--- +title: "Overview" +description: "A compliance CLI tool for Kubernetes." +date: 2026-03-10T18:30:00Z +lastmod: 2026-03-10T18:30:00Z +draft: false +toc: true +weight: 1 +params: + editURL: "https://github.com/complytime/complyctl/edit/main/README.md" +--- +``` + +#### Auto-Discovered Doc Page Frontmatter + +```yaml +--- +title: "Quick Start" +description: "A compliance CLI tool for Kubernetes. — Quick Start" +date: 2026-03-10T18:30:00Z +lastmod: 2026-03-10T18:30:00Z +draft: false +weight: 10 +params: + editURL: "https://github.com/complytime/complyctl/edit/main/docs/quick-start.md" +--- + +``` + +### Content Transforms + +| Transform | What it does | +|-----------|-------------| +| `stripLeadingH1` | Removes the first H1 heading from the content body — the title is already captured in frontmatter, so the leading H1 would be a duplicate | +| `shiftHeadings` | Bumps every Markdown heading down one level (H1→H2, H2→H3, …) so Hugo's page title is the sole H1 | +| `titleCaseHeadings` | Applies acronym-aware Title Case to all in-page heading text (e.g. `## getting started` → `## Getting Started`, `## api reference` → `## API Reference`, `## CONTRIBUTING` → `## Contributing`); normalises ALL CAPS words while preserving known acronyms; ensures page headings and Hugo's TableOfContents match | +| `stripBadges` | Removes `[![alt](img)](link)` badge patterns from the start of content | +| `rewriteRelativeLinks` | Converts `[text](path)` to `[text](https://github.com/.../blob/main/path)` and `![alt](img)` to `![alt](https://raw.githubusercontent.com/.../img)` | +| `injectFrontmatter` | Prepends or replaces YAML frontmatter with declared key-value pairs | + +## CI/CD Integration + +### Three-Workflow Model + +The tool integrates with three GitHub Actions workflows (Constitution XV v1.3.0): + +**1. CI (`ci.yml`)** — PR validation (syncs content and builds the site to catch breakage): + +```yaml +- name: Sync content + run: go run ./cmd/sync-content --org complytime --config sync-config.yaml --lock .content-lock.json --write + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} +``` + +**2. Content Sync Check (`sync-content-check.yml`)** — weekly upstream detection: + +```yaml +- name: Check for upstream changes + run: go run ./cmd/sync-content --org complytime --config sync-config.yaml --lock .content-lock.json --update-lock --summary sync-summary.md +``` + +Checks upstream SHAs and creates/updates a PR with lockfile changes when content has moved. Since peribolos provides the authoritative repo list, separate discovery is unnecessary. + +**3. Deploy (`deploy-gh-pages.yml`)** — production build: + +```yaml +- name: Sync content + run: go run ./cmd/sync-content --org complytime --config sync-config.yaml --lock .content-lock.json --write + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + +- name: Build site + run: hugo --minify --gc +``` + +Upstream content changes require a reviewed PR before reaching production — no +unreviewed content is deployed. + +### Structured Outputs + +When running in GitHub Actions, the tool writes structured data to +`$GITHUB_OUTPUT` and `$GITHUB_STEP_SUMMARY`: + +**`GITHUB_OUTPUT`:** + +``` +has_changes=true +changed_count=3 +error_count=0 +``` + +**`GITHUB_STEP_SUMMARY`:** A Markdown table with new/updated/removed repos and +sync stats. + +**`--summary` flag:** Writes the same Markdown report to a file, useful for +automated PR body generation. + +### Exit Codes + +| Code | Meaning | +|------|---------| +| 0 | Success (all repos synced or dry-run complete) | +| 1 | One or more errors occurred (API failures, write errors) | + +## Testing + +Tests are split across 10 `*_test.go` files that mirror the source files. A +shared `helpers_test.go` provides common utilities. + +```bash +# Run all tests +go test ./cmd/sync-content/... + +# Run with race detector +go test -race ./cmd/sync-content/... + +# Run with verbose output +go test -v ./cmd/sync-content/... +``` + +### Test Coverage + +| Category | What's tested | +|----------|---------------| +| Config loading | Valid YAML, malformed YAML, missing file, default values, missing required fields | +| Frontmatter injection | Prepend to bare content, replace existing frontmatter, empty content | +| Badge stripping | Line-start badges removed, inline badges preserved, no-badge passthrough | +| Heading shifting | All headings bumped down one level (H1→H2, H2→H3, …) so Hugo page title is the sole H1 | +| Heading casing | ALL CAPS normalised to Title Case, acronyms preserved, mixed-case normalised, multi-word headings | +| Title from filename | ALL CAPS filenames (`CONTRIBUTING.md` → `Contributing`), hyphen/underscore splitting, acronym preservation | +| Link rewriting | Relative to absolute, images to raw URLs, absolute URLs unchanged, anchors unchanged, `./` prefix | +| Repo name validation | Valid names, empty, `.`, `..`, path separators | +| `processRepo` integration | Mock API server, project page written with correct frontmatter, headings shifted, README SHA recorded | +| Branch-unchanged fast path | No README fetch when branch SHA matches, manifest carry-forward | +| Branch-changed README-unchanged | Two-tier detection classifies as unchanged | +| `syncConfigSource` | All transforms applied, provenance comment inserted, dry-run writes nothing | +| Doc page scanning | Auto-syncs `docs/*.md`, skips config-tracked files, generates section indexes | +| Manifest round-trip | Write and read manifest, orphan cleanup, empty directory pruning | +| Concurrent access | Race-safe `syncResult` mutations, concurrent `recordFile` | +| Peribolos integration | Governance registry fetch, repo validation, missing org handling | + +All integration tests use `net/http/httptest` to mock the GitHub API. No real API +calls are made during testing. + +## File Inventory + +``` +cmd/sync-content/ +├── main.go # Entry point and orchestration (~440 lines) +├── config.go # Config types and loading +├── github.go # GitHub API client and types +├── transform.go # Markdown transforms (links, badges, frontmatter) +├── hugo.go # Hugo page and card generation +├── sync.go # Sync logic, result tracking, repo processing +├── manifest.go # Manifest I/O and state tracking +├── cleanup.go # Orphan and stale content removal +├── path.go # Path validation utilities +├── lock.go # Content lockfile read/write/query +├── *_test.go # Tests mirror source files (10 files) +└── README.md # This file + +sync-config.yaml # Declarative sync config (repo root) +.content-lock.json # Approved upstream SHAs per repo (committed) +go.mod # Go module: github.com/complytime/website +go.sum # Dependency checksums +``` + +### Generated Files (gitignored, not committed) + +``` +content/docs/projects/{repo}/_index.md # Section index (metadata only) +content/docs/projects/{repo}/overview.md # README content page +content/docs/projects/{repo}/*.md # Auto-discovered doc pages +data/projects.json # Landing page card data +.sync-manifest.json # Orphan tracking manifest +``` + +## License + +SPDX-License-Identifier: Apache-2.0 diff --git a/cmd/sync-content/cleanup.go b/cmd/sync-content/cleanup.go new file mode 100644 index 0000000..9aea47a --- /dev/null +++ b/cmd/sync-content/cleanup.go @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "log/slog" + "os" + "path/filepath" +) + +// cleanOrphanedFiles removes files present in the old manifest but absent from +// the current sync run. After each removal it prunes empty parent directories +// up to outputDir. +func cleanOrphanedFiles(outputDir string, oldManifest map[string]bool, currentFiles []string) int { + current := make(map[string]bool, len(currentFiles)) + for _, f := range currentFiles { + current[f] = true + } + removed := 0 + for relPath := range oldManifest { + if current[relPath] { + continue + } + fullPath := filepath.Join(outputDir, relPath) + if !isUnderDir(outputDir, fullPath) { + slog.Warn("skipping orphaned file outside output dir", "path", relPath) + continue + } + if err := os.Remove(fullPath); err != nil { + if !os.IsNotExist(err) { + slog.Warn("could not remove orphaned file", "path", fullPath, "error", err) + } + continue + } + slog.Info("removed orphaned file", "path", relPath) + removed++ + dir := filepath.Dir(fullPath) + absOutput := filepath.Clean(outputDir) + for dir != absOutput && dir != "." && dir != "/" { + if !isUnderDir(outputDir, dir) { + break + } + if err := os.Remove(dir); err != nil { + break + } + slog.Info("removed empty directory", "path", dir) + dir = filepath.Dir(dir) + } + } + return removed +} diff --git a/cmd/sync-content/cleanup_test.go b/cmd/sync-content/cleanup_test.go new file mode 100644 index 0000000..62f8ad7 --- /dev/null +++ b/cmd/sync-content/cleanup_test.go @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: Apache-2.0 +package main + +import ( + "os" + "path/filepath" + "testing" +) + +func TestCleanOrphanedFiles(t *testing.T) { + dir := t.TempDir() + + staleFile := filepath.Join(dir, "content", "docs", "projects", "complyctl", "quick-start.md") + keptFile := filepath.Join(dir, "content", "docs", "projects", "complyctl", "_index.md") + otherFile := filepath.Join(dir, "content", "docs", "projects", "complyscribe", "_index.md") + + for _, f := range []string{staleFile, keptFile, otherFile} { + if err := os.MkdirAll(filepath.Dir(f), 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + if err := os.WriteFile(f, []byte("test"), 0o600); err != nil { + t.Fatalf("WriteFile: %v", err) + } + } + + oldManifest := map[string]bool{ + "content/docs/projects/complyctl/_index.md": true, + "content/docs/projects/complyctl/quick-start.md": true, + "content/docs/projects/complyscribe/_index.md": true, + } + + currentFiles := []string{ + "content/docs/projects/complyctl/_index.md", + "content/docs/projects/complyscribe/_index.md", + } + + removed := cleanOrphanedFiles(dir, oldManifest, currentFiles) + + if removed != 1 { + t.Errorf("removed = %d, want 1", removed) + } + if _, err := os.Stat(staleFile); !os.IsNotExist(err) { + t.Error("stale file quick-start.md should have been removed") + } + if _, err := os.Stat(keptFile); err != nil { + t.Error("kept file _index.md should still exist") + } + if _, err := os.Stat(otherFile); err != nil { + t.Error("other repo file should still exist") + } +} + +func TestCleanOrphanedFiles_PrunesEmptyDirs(t *testing.T) { + dir := t.TempDir() + + staleDir := filepath.Join(dir, "content", "docs", "projects", "removed-repo") + staleFile := filepath.Join(staleDir, "_index.md") + if err := os.MkdirAll(staleDir, 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + if err := os.WriteFile(staleFile, []byte("test"), 0o600); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + oldManifest := map[string]bool{ + "content/docs/projects/removed-repo/_index.md": true, + } + + removed := cleanOrphanedFiles(dir, oldManifest, nil) + + if removed != 1 { + t.Errorf("removed = %d, want 1", removed) + } + if _, err := os.Stat(staleDir); !os.IsNotExist(err) { + t.Error("empty directory should have been pruned") + } +} + +func TestCleanOrphanedFiles_TraversalBlocked(t *testing.T) { + dir := t.TempDir() + + outsideDir := t.TempDir() + outsideFile := filepath.Join(outsideDir, "should-survive.txt") + if err := os.WriteFile(outsideFile, []byte("protected"), 0o600); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + relTraversal, err := filepath.Rel(dir, outsideFile) + if err != nil { + t.Fatalf("could not compute relative path: %v", err) + } + + oldManifest := map[string]bool{ + relTraversal: true, + } + + removed := cleanOrphanedFiles(dir, oldManifest, nil) + + if removed != 0 { + t.Errorf("removed = %d, want 0 (traversal should be blocked)", removed) + } + if _, err := os.Stat(outsideFile); err != nil { + t.Errorf("file outside output dir was deleted: %v", err) + } +} + +func TestCleanOrphanedFiles_LegitimateRemoval(t *testing.T) { + dir := t.TempDir() + + legitFile := filepath.Join(dir, "content", "docs", "projects", "old-repo", "_index.md") + if err := os.MkdirAll(filepath.Dir(legitFile), 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + if err := os.WriteFile(legitFile, []byte("stale"), 0o600); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + oldManifest := map[string]bool{ + "content/docs/projects/old-repo/_index.md": true, + } + + removed := cleanOrphanedFiles(dir, oldManifest, nil) + + if removed != 1 { + t.Errorf("removed = %d, want 1 (legitimate orphan should be cleaned)", removed) + } + if _, err := os.Stat(legitFile); !os.IsNotExist(err) { + t.Error("legitimate orphan should have been removed") + } +} diff --git a/cmd/sync-content/config.go b/cmd/sync-content/config.go new file mode 100644 index 0000000..c0c6346 --- /dev/null +++ b/cmd/sync-content/config.go @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "fmt" + "os" + + "gopkg.in/yaml.v3" +) + +// PeribolosConfig is the top-level structure parsed from peribolos.yaml +// in the org's .github repo. +type PeribolosConfig struct { + Orgs map[string]PeribolosOrg `yaml:"orgs"` +} + +// PeribolosOrg represents an organization entry in peribolos.yaml. +type PeribolosOrg struct { + Repos map[string]PeribolosRepo `yaml:"repos"` +} + +// PeribolosRepo holds per-repo metadata from peribolos.yaml. +type PeribolosRepo struct { + Description string `yaml:"description"` + DefaultBranch string `yaml:"default_branch"` +} + +// SyncConfig is the top-level structure parsed from sync-config.yaml. +type SyncConfig struct { + Defaults Defaults `yaml:"defaults"` + Sources []Source `yaml:"sources"` + Discovery Discovery `yaml:"discovery"` +} + +// Discovery configures automatic detection of new repos and doc files +// that are not yet declared in sources. +type Discovery struct { + IgnoreRepos []string `yaml:"ignore_repos"` + IgnoreFiles []string `yaml:"ignore_files"` + ScanPaths []string `yaml:"scan_paths"` +} + +// Defaults holds fallback values applied to every source unless overridden. +type Defaults struct { + Branch string `yaml:"branch"` +} + +// Source is a single upstream repository declared in the config file. +type Source struct { + Repo string `yaml:"repo"` + Branch string `yaml:"branch"` + SkipOrgSync bool `yaml:"skip_org_sync"` + Files []FileSpec `yaml:"files"` +} + +// FileSpec describes one file to fetch from a source repo and where to place it. +type FileSpec struct { + Src string `yaml:"src"` + Dest string `yaml:"dest"` + Transform Transform `yaml:"transform"` +} + +// Transform describes optional mutations applied to fetched content. +type Transform struct { + InjectFrontmatter map[string]any `yaml:"inject_frontmatter"` + RewriteLinks bool `yaml:"rewrite_links"` + StripBadges bool `yaml:"strip_badges"` +} + +// loadConfig reads a sync-config.yaml file and returns the parsed configuration. +// It applies default values (e.g. branch) and validates that every source has +// the required fields. +func loadConfig(path string) (*SyncConfig, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("reading config %s: %w", path, err) + } + + var cfg SyncConfig + if err := yaml.Unmarshal(data, &cfg); err != nil { + return nil, fmt.Errorf("parsing config %s: %w", path, err) + } + + if cfg.Defaults.Branch == "" { + cfg.Defaults.Branch = "main" + } + + for i := range cfg.Sources { + src := &cfg.Sources[i] + if src.Repo == "" { + return nil, fmt.Errorf("config %s: source[%d] missing required field 'repo'", path, i) + } + if src.Branch == "" { + src.Branch = cfg.Defaults.Branch + } + for j, f := range src.Files { + if f.Src == "" { + return nil, fmt.Errorf("config %s: source[%d] (%s) file[%d] missing 'src'", path, i, src.Repo, j) + } + if f.Dest == "" { + return nil, fmt.Errorf("config %s: source[%d] (%s) file[%d] missing 'dest'", path, i, src.Repo, j) + } + } + } + + return &cfg, nil +} diff --git a/cmd/sync-content/config_test.go b/cmd/sync-content/config_test.go new file mode 100644 index 0000000..72ca3a3 --- /dev/null +++ b/cmd/sync-content/config_test.go @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: Apache-2.0 +package main + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestLoadConfig(t *testing.T) { + t.Run("valid config", func(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "sync-config.yaml") + if err := os.WriteFile(path, []byte(` +defaults: + branch: main +sources: + - repo: org/repo1 + files: + - src: README.md + dest: content/docs/projects/repo1/_index.md +`), 0o600); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + cfg, err := loadConfig(path) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg.Defaults.Branch != "main" { + t.Errorf("branch = %q, want %q", cfg.Defaults.Branch, "main") + } + if len(cfg.Sources) != 1 { + t.Fatalf("sources count = %d, want 1", len(cfg.Sources)) + } + if cfg.Sources[0].Repo != "org/repo1" { + t.Errorf("repo = %q, want %q", cfg.Sources[0].Repo, "org/repo1") + } + if cfg.Sources[0].Branch != "main" { + t.Errorf("source branch = %q, want %q (inherited from defaults)", cfg.Sources[0].Branch, "main") + } + }) + + t.Run("default branch applied", func(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "cfg.yaml") + if err := os.WriteFile(path, []byte(` +sources: + - repo: org/repo1 + files: + - src: README.md + dest: out/README.md +`), 0o600); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + cfg, err := loadConfig(path) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg.Defaults.Branch != "main" { + t.Errorf("default branch = %q, want %q", cfg.Defaults.Branch, "main") + } + }) + + t.Run("malformed YAML", func(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "bad.yaml") + if err := os.WriteFile(path, []byte(`{{{not yaml`), 0o600); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + _, err := loadConfig(path) + if err == nil { + t.Fatal("expected error for malformed YAML") + } + }) + + t.Run("missing file", func(t *testing.T) { + _, err := loadConfig("/nonexistent/path.yaml") + if err == nil { + t.Fatal("expected error for missing file") + } + }) + + t.Run("missing repo field", func(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "cfg.yaml") + if err := os.WriteFile(path, []byte(` +sources: + - files: + - src: README.md + dest: out/README.md +`), 0o600); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + _, err := loadConfig(path) + if err == nil { + t.Fatal("expected error for missing repo") + } + if !strings.Contains(err.Error(), "missing required field 'repo'") { + t.Errorf("error = %q, want it to mention missing repo", err) + } + }) + + t.Run("missing src field", func(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "cfg.yaml") + if err := os.WriteFile(path, []byte(` +sources: + - repo: org/repo1 + files: + - dest: out/README.md +`), 0o600); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + _, err := loadConfig(path) + if err == nil { + t.Fatal("expected error for missing src") + } + if !strings.Contains(err.Error(), "missing 'src'") { + t.Errorf("error = %q, want it to mention missing src", err) + } + }) +} diff --git a/cmd/sync-content/github.go b/cmd/sync-content/github.go new file mode 100644 index 0000000..4aa57e8 --- /dev/null +++ b/cmd/sync-content/github.go @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "context" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "log/slog" + "net/http" + "net/url" + "sort" + "strconv" + "strings" + "time" + + "gopkg.in/yaml.v3" +) + +const ( + githubAPI = "https://api.github.com" + maxRetries = 3 + maxResponseBytes = 10 << 20 // 10 MB safety ceiling for API response bodies + maxDirDepth = 10 +) + +// GitHub API response types + +type Repo struct { + Name string `json:"name"` + FullName string `json:"full_name"` + Description string `json:"description"` + Language string `json:"language"` + StargazersCount int `json:"stargazers_count"` + HTMLURL string `json:"html_url"` + DefaultBranch string `json:"default_branch"` + PushedAt string `json:"pushed_at"` + Topics []string `json:"topics"` +} + +type FileResponse struct { + Content string `json:"content"` + Encoding string `json:"encoding"` + SHA string `json:"sha"` +} + +type DirEntry struct { + Name string `json:"name"` + Path string `json:"path"` + Type string `json:"type"` +} + +type BranchResponse struct { + Commit struct { + SHA string `json:"sha"` + } `json:"commit"` +} + +// apiClient wraps net/http for authenticated GitHub REST API calls. +type apiClient struct { + token string + http *http.Client +} + +func (c *apiClient) do(ctx context.Context, url string) (*http.Response, error) { + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return nil, err + } + req.Header.Set("Accept", "application/vnd.github.v3+json") + if c.token != "" { + req.Header.Set("Authorization", "Bearer "+c.token) + } + return c.http.Do(req) +} + +// getJSON fetches a URL and decodes JSON, retrying on rate limit (403/429) +// with exponential backoff and respect for Retry-After / X-RateLimit-Reset. +func (c *apiClient) getJSON(ctx context.Context, url string, dst any) error { + var lastErr error + for attempt := range maxRetries + 1 { + resp, err := c.do(ctx, url) + if err != nil { + return err + } + + if resp.StatusCode == http.StatusOK { + limited := io.LimitReader(resp.Body, maxResponseBytes) + err = json.NewDecoder(limited).Decode(dst) + _, _ = io.Copy(io.Discard, resp.Body) + _ = resp.Body.Close() + return err + } + + body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + _ = resp.Body.Close() + lastErr = fmt.Errorf("GET %s: %d %s", url, resp.StatusCode, body) + + if !isRateLimited(resp) || attempt == maxRetries { + return lastErr + } + + wait := retryWait(resp, attempt) + slog.Warn("rate limited, retrying", "url", url, "attempt", attempt+1, "wait", wait.Round(time.Second)) + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(wait): + } + } + return lastErr +} + +func isRateLimited(resp *http.Response) bool { + if resp.StatusCode == http.StatusTooManyRequests { + return true + } + if resp.StatusCode == http.StatusForbidden { + return resp.Header.Get("X-RateLimit-Remaining") == "0" + } + return false +} + +func retryWait(resp *http.Response, attempt int) time.Duration { + if ra := resp.Header.Get("Retry-After"); ra != "" { + if seconds, err := strconv.Atoi(ra); err == nil { + return time.Duration(seconds) * time.Second + } + } + if reset := resp.Header.Get("X-RateLimit-Reset"); reset != "" { + if ts, err := strconv.ParseInt(reset, 10, 64); err == nil { + wait := time.Until(time.Unix(ts, 0)) + time.Second + if wait > 0 && wait < 5*time.Minute { + return wait + } + if wait >= 5*time.Minute { + slog.Warn("rate limit reset too far in future, using backoff", "reset_in", wait.Round(time.Second)) + } + } + } + shift := attempt + if shift < 0 { + shift = 0 + } + if shift > 5 { + shift = 5 + } + return time.Duration(1<.repos. +func (c *apiClient) fetchPeribolosRepos(ctx context.Context, org string) ([]string, error) { + apiURL := fmt.Sprintf("%s/repos/%s/.github/contents/peribolos.yaml", + githubAPI, url.PathEscape(org)) + var f FileResponse + if err := c.getJSON(ctx, apiURL, &f); err != nil { + return nil, fmt.Errorf("fetching peribolos.yaml from %s/.github: %w", org, err) + } + content, err := decodeContent(f) + if err != nil { + return nil, fmt.Errorf("decoding peribolos.yaml: %w", err) + } + + var pc PeribolosConfig + if err := yaml.Unmarshal([]byte(content), &pc); err != nil { + return nil, fmt.Errorf("parsing peribolos.yaml: %w", err) + } + + orgData, ok := pc.Orgs[org] + if !ok { + return nil, fmt.Errorf("peribolos.yaml has no entry for org %q", org) + } + + names := make([]string, 0, len(orgData.Repos)) + for name := range orgData.Repos { + names = append(names, name) + } + sort.Strings(names) + return names, nil +} + +// getRepoMetadata fetches full metadata for a single repo from the GitHub API. +func (c *apiClient) getRepoMetadata(ctx context.Context, owner, name string) (*Repo, error) { + apiURL := fmt.Sprintf("%s/repos/%s/%s", + githubAPI, url.PathEscape(owner), url.PathEscape(name)) + var repo Repo + if err := c.getJSON(ctx, apiURL, &repo); err != nil { + return nil, err + } + return &repo, nil +} + +func (c *apiClient) getREADME(ctx context.Context, owner, repo, ref string) (string, string, error) { + apiURL := fmt.Sprintf("%s/repos/%s/%s/readme", + githubAPI, url.PathEscape(owner), url.PathEscape(repo)) + apiURL = appendRef(apiURL, ref) + var f FileResponse + if err := c.getJSON(ctx, apiURL, &f); err != nil { + return "", "", err + } + content, err := decodeContent(f) + return content, f.SHA, err +} + +func (c *apiClient) getFileContent(ctx context.Context, owner, repo, path, ref string) (string, string, error) { + apiURL := fmt.Sprintf("%s/repos/%s/%s/contents/%s", + githubAPI, url.PathEscape(owner), url.PathEscape(repo), escapePathSegments(path)) + apiURL = appendRef(apiURL, ref) + var f FileResponse + if err := c.getJSON(ctx, apiURL, &f); err != nil { + return "", "", err + } + content, err := decodeContent(f) + return content, f.SHA, err +} + +func (c *apiClient) listDir(ctx context.Context, owner, repo, path, ref string) ([]DirEntry, error) { + apiURL := fmt.Sprintf("%s/repos/%s/%s/contents/%s", + githubAPI, url.PathEscape(owner), url.PathEscape(repo), escapePathSegments(path)) + apiURL = appendRef(apiURL, ref) + var entries []DirEntry + if err := c.getJSON(ctx, apiURL, &entries); err != nil { + return nil, err + } + return entries, nil +} + +func (c *apiClient) getBranchSHA(ctx context.Context, owner, repo, branch string) (string, error) { + apiURL := fmt.Sprintf("%s/repos/%s/%s/branches/%s", + githubAPI, url.PathEscape(owner), url.PathEscape(repo), url.PathEscape(branch)) + var b BranchResponse + if err := c.getJSON(ctx, apiURL, &b); err != nil { + return "", err + } + return b.Commit.SHA, nil +} + +// listDirMD recursively lists .md files under a directory, reusing listDir. +// Returns paths relative to the repo root (e.g. "docs/guide.md"). +// Recursion is bounded to maxDirDepth levels to limit API calls on deeply +// nested repositories. +func (c *apiClient) listDirMD(ctx context.Context, owner, repo, dir, ref string) ([]string, error) { + return c.listDirMDDepth(ctx, owner, repo, dir, ref, 0) +} + +func (c *apiClient) listDirMDDepth(ctx context.Context, owner, repo, dir, ref string, depth int) ([]string, error) { + if depth >= maxDirDepth { + slog.Warn("max directory depth reached, skipping deeper levels", "repo", owner+"/"+repo, "dir", dir, "depth", depth) + return nil, nil + } + entries, err := c.listDir(ctx, owner, repo, dir, ref) + if err != nil { + return nil, err + } + var files []string + for _, e := range entries { + switch { + case e.Type == "file" && strings.HasSuffix(e.Name, ".md"): + if e.Path != "" { + files = append(files, e.Path) + } else { + files = append(files, dir+"/"+e.Name) + } + case e.Type == "dir": + subDir := dir + "/" + e.Name + if e.Path != "" { + subDir = e.Path + } + sub, err := c.listDirMDDepth(ctx, owner, repo, subDir, ref, depth+1) + if err != nil { + slog.Warn("could not list subdir", "repo", owner+"/"+repo, "dir", subDir, "error", err) + continue + } + files = append(files, sub...) + } + } + return files, nil +} + +func decodeContent(f FileResponse) (string, error) { + if f.Encoding != "base64" { + return f.Content, nil + } + raw := strings.NewReplacer("\n", "", "\r", "").Replace(f.Content) + decoded, err := base64.StdEncoding.DecodeString(raw) + if err != nil { + return "", fmt.Errorf("base64 decode: %w", err) + } + return string(decoded), nil +} diff --git a/cmd/sync-content/github_test.go b/cmd/sync-content/github_test.go new file mode 100644 index 0000000..a592fba --- /dev/null +++ b/cmd/sync-content/github_test.go @@ -0,0 +1,332 @@ +// SPDX-License-Identifier: Apache-2.0 +package main + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + "time" +) + +func TestListDirMD(t *testing.T) { + mux := http.NewServeMux() + + mux.HandleFunc("/repos/org/repo/contents/docs", func(w http.ResponseWriter, r *http.Request) { + _ = json.NewEncoder(w).Encode([]DirEntry{ + {Name: "guide.md", Path: "docs/guide.md", Type: "file"}, + {Name: "image.png", Path: "docs/image.png", Type: "file"}, + {Name: "sub", Path: "docs/sub", Type: "dir"}, + }) + }) + + mux.HandleFunc("/repos/org/repo/contents/docs/sub", func(w http.ResponseWriter, r *http.Request) { + _ = json.NewEncoder(w).Encode([]DirEntry{ + {Name: "nested.md", Path: "docs/sub/nested.md", Type: "file"}, + {Name: "data.json", Path: "docs/sub/data.json", Type: "file"}, + }) + }) + + server := httptest.NewServer(mux) + defer server.Close() + + gh := newTestClient(server.URL) + ctx := context.Background() + + files, err := gh.listDirMD(ctx, "org", "repo", "docs", "") + if err != nil { + t.Fatalf("listDirMD: %v", err) + } + + want := map[string]bool{ + "docs/guide.md": true, + "docs/sub/nested.md": true, + } + got := make(map[string]bool) + for _, f := range files { + got[f] = true + } + + if len(got) != len(want) { + t.Errorf("got %d files, want %d: %v", len(got), len(want), files) + } + for w := range want { + if !got[w] { + t.Errorf("missing expected file %q", w) + } + } +} + +func TestListDirMD_DepthLimit(t *testing.T) { + callCount := 0 + + mux := http.NewServeMux() + mux.HandleFunc("/repos/org/repo/contents/", func(w http.ResponseWriter, r *http.Request) { + callCount++ + _ = json.NewEncoder(w).Encode([]DirEntry{ + {Name: "file.md", Path: r.URL.Path[len("/repos/org/repo/contents/"):] + "/file.md", Type: "file"}, + {Name: "deeper", Path: r.URL.Path[len("/repos/org/repo/contents/"):] + "/deeper", Type: "dir"}, + }) + }) + + server := httptest.NewServer(mux) + defer server.Close() + + gh := newTestClient(server.URL) + ctx := context.Background() + + files, err := gh.listDirMD(ctx, "org", "repo", "docs", "") + if err != nil { + t.Fatalf("listDirMD: %v", err) + } + + if callCount > maxDirDepth+1 { + t.Errorf("API calls = %d, expected at most %d (depth limit should cap recursion)", callCount, maxDirDepth+1) + } + + if len(files) == 0 { + t.Error("expected at least some .md files to be found") + } + if len(files) > maxDirDepth+1 { + t.Errorf("found %d files, expected at most %d", len(files), maxDirDepth+1) + } +} + +func TestEscapePathSegments(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"docs/guide.md", "docs/guide.md"}, + {"docs/my file.md", "docs/my%20file.md"}, + {"path/with spaces/file#1.md", "path/with%20spaces/file%231.md"}, + } + for _, tt := range tests { + got := escapePathSegments(tt.input) + if got != tt.want { + t.Errorf("escapePathSegments(%q) = %q, want %q", tt.input, got, tt.want) + } + } +} + +func TestAppendRef(t *testing.T) { + tests := []struct { + url string + ref string + want string + }{ + {"https://api.github.com/repos/o/r/readme", "", "https://api.github.com/repos/o/r/readme"}, + {"https://api.github.com/repos/o/r/readme", "abc123", "https://api.github.com/repos/o/r/readme?ref=abc123"}, + {"https://api.github.com/repos/o/r/contents/docs?per_page=100", "def456", "https://api.github.com/repos/o/r/contents/docs?per_page=100&ref=def456"}, + } + for _, tt := range tests { + got := appendRef(tt.url, tt.ref) + if got != tt.want { + t.Errorf("appendRef(%q, %q) = %q, want %q", tt.url, tt.ref, got, tt.want) + } + } +} + +func TestGetREADME_WithRef(t *testing.T) { + var receivedRef string + + mux := http.NewServeMux() + mux.HandleFunc("/repos/org/repo/readme", func(w http.ResponseWriter, r *http.Request) { + receivedRef = r.URL.Query().Get("ref") + _ = json.NewEncoder(w).Encode(FileResponse{ + Content: "VEVTVA==", + Encoding: "base64", + SHA: "sha123", + }) + }) + + server := httptest.NewServer(mux) + defer server.Close() + + gh := newTestClient(server.URL) + ctx := context.Background() + + _, _, err := gh.getREADME(ctx, "org", "repo", "locked-sha-abc") + if err != nil { + t.Fatalf("getREADME: %v", err) + } + if receivedRef != "locked-sha-abc" { + t.Errorf("ref = %q, want %q", receivedRef, "locked-sha-abc") + } + + receivedRef = "" + _, _, err = gh.getREADME(ctx, "org", "repo", "") + if err != nil { + t.Fatalf("getREADME (no ref): %v", err) + } + if receivedRef != "" { + t.Errorf("ref should be empty when not provided, got %q", receivedRef) + } +} + +func TestListDirMD_WithRef(t *testing.T) { + var receivedRef string + + mux := http.NewServeMux() + mux.HandleFunc("/repos/org/repo/contents/docs", func(w http.ResponseWriter, r *http.Request) { + receivedRef = r.URL.Query().Get("ref") + _ = json.NewEncoder(w).Encode([]DirEntry{ + {Name: "guide.md", Path: "docs/guide.md", Type: "file"}, + }) + }) + + server := httptest.NewServer(mux) + defer server.Close() + + gh := newTestClient(server.URL) + ctx := context.Background() + + _, err := gh.listDirMD(ctx, "org", "repo", "docs", "pinned-sha") + if err != nil { + t.Fatalf("listDirMD: %v", err) + } + if receivedRef != "pinned-sha" { + t.Errorf("ref = %q, want %q", receivedRef, "pinned-sha") + } +} + +func TestFetchPeribolosRepos(t *testing.T) { + peribolosYAML := `orgs: + myorg: + repos: + alpha: + description: "first repo" + beta: + description: "second repo" + gamma: + description: "third repo" +` + + t.Run("success", func(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/repos/myorg/.github/contents/peribolos.yaml", func(w http.ResponseWriter, r *http.Request) { + _ = json.NewEncoder(w).Encode(FileResponse{ + Content: b64(peribolosYAML), + Encoding: "base64", + }) + }) + + server := httptest.NewServer(mux) + defer server.Close() + + gh := newTestClient(server.URL) + names, err := gh.fetchPeribolosRepos(context.Background(), "myorg") + if err != nil { + t.Fatalf("fetchPeribolosRepos: %v", err) + } + want := []string{"alpha", "beta", "gamma"} + if len(names) != len(want) { + t.Fatalf("got %d repos, want %d: %v", len(names), len(want), names) + } + for i, name := range names { + if name != want[i] { + t.Errorf("repo[%d] = %q, want %q", i, name, want[i]) + } + } + }) + + t.Run("missing org in peribolos", func(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/repos/otherorg/.github/contents/peribolos.yaml", func(w http.ResponseWriter, r *http.Request) { + _ = json.NewEncoder(w).Encode(FileResponse{ + Content: b64(peribolosYAML), + Encoding: "base64", + }) + }) + + server := httptest.NewServer(mux) + defer server.Close() + + gh := newTestClient(server.URL) + _, err := gh.fetchPeribolosRepos(context.Background(), "otherorg") + if err == nil { + t.Fatal("expected error for missing org") + } + }) + + t.Run("peribolos.yaml not found", func(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/repos/noorg/.github/contents/peribolos.yaml", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + _, _ = w.Write([]byte(`{"message":"Not Found"}`)) + }) + + server := httptest.NewServer(mux) + defer server.Close() + + gh := newTestClient(server.URL) + _, err := gh.fetchPeribolosRepos(context.Background(), "noorg") + if err == nil { + t.Fatal("expected error when peribolos.yaml is missing") + } + }) +} + +func TestGetRepoMetadata(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/repos/org/myrepo", func(w http.ResponseWriter, r *http.Request) { + _ = json.NewEncoder(w).Encode(Repo{ + Name: "myrepo", + FullName: "org/myrepo", + Description: "A test repo", + HTMLURL: "https://github.com/org/myrepo", + }) + }) + + server := httptest.NewServer(mux) + defer server.Close() + + gh := newTestClient(server.URL) + repo, err := gh.getRepoMetadata(context.Background(), "org", "myrepo") + if err != nil { + t.Fatalf("getRepoMetadata: %v", err) + } + if repo.Name != "myrepo" { + t.Errorf("name = %q, want %q", repo.Name, "myrepo") + } + if repo.FullName != "org/myrepo" { + t.Errorf("full_name = %q, want %q", repo.FullName, "org/myrepo") + } + if repo.Description != "A test repo" { + t.Errorf("description = %q, want %q", repo.Description, "A test repo") + } +} + +func TestContextCancellationDuringRetry(t *testing.T) { + callCount := 0 + mux := http.NewServeMux() + mux.HandleFunc("/test-endpoint", func(w http.ResponseWriter, r *http.Request) { + callCount++ + w.Header().Set("Retry-After", "60") + w.WriteHeader(http.StatusTooManyRequests) + _, _ = w.Write([]byte(`{"message":"rate limited"}`)) + }) + server := httptest.NewServer(mux) + defer server.Close() + + gh := newTestClient(server.URL) + + ctx, cancel := context.WithCancel(context.Background()) + go func() { + time.Sleep(100 * time.Millisecond) + cancel() + }() + + start := time.Now() + var result map[string]any + err := gh.getJSON(ctx, server.URL+"/test-endpoint", &result) + elapsed := time.Since(start) + + if err == nil { + t.Fatal("expected error from cancelled context") + } + if elapsed > 2*time.Second { + t.Errorf("cancellation took %v, expected < 2s", elapsed) + } +} diff --git a/cmd/sync-content/helpers_test.go b/cmd/sync-content/helpers_test.go new file mode 100644 index 0000000..52e8ec4 --- /dev/null +++ b/cmd/sync-content/helpers_test.go @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: Apache-2.0 +package main + +import ( + "encoding/base64" + "net/http" + "strings" +) + +// urlRewriter intercepts HTTP requests and redirects them to the test server, +// allowing the apiClient to use its hardcoded githubAPI constant while actually +// hitting the mock server. +type urlRewriter struct { + targetHost string + targetPort string +} + +func (r *urlRewriter) RoundTrip(req *http.Request) (*http.Response, error) { + req.URL.Scheme = "http" + req.URL.Host = r.targetHost + ":" + r.targetPort + return http.DefaultTransport.RoundTrip(req) +} + +func newTestClient(serverURL string) *apiClient { + parts := strings.TrimPrefix(serverURL, "http://") + hostPort := strings.SplitN(parts, ":", 2) + host, port := hostPort[0], hostPort[1] + + return &apiClient{ + token: "test-token", + http: &http.Client{ + Transport: &urlRewriter{targetHost: host, targetPort: port}, + }, + } +} + +func b64(s string) string { + return base64.StdEncoding.EncodeToString([]byte(s)) +} diff --git a/cmd/sync-content/hugo.go b/cmd/sync-content/hugo.go new file mode 100644 index 0000000..5d33a31 --- /dev/null +++ b/cmd/sync-content/hugo.go @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "fmt" + "path/filepath" + "strings" +) + +// ProjectCard is the structure written to data/projects.json for landing page templates. +type ProjectCard struct { + Name string `json:"name"` + Language string `json:"language"` + Type string `json:"type"` + Description string `json:"description"` + URL string `json:"url"` + Repo string `json:"repo"` + Stars int `json:"stars"` +} + +// deriveProjectType infers a human-readable type label from repo topics and description. +func deriveProjectType(r Repo) string { + topics := make(map[string]bool, len(r.Topics)) + for _, t := range r.Topics { + topics[strings.ToLower(t)] = true + } + desc := strings.ToLower(r.Description) + + switch { + case topics["cli"] || strings.Contains(desc, "command-line") || strings.Contains(desc, " cli"): + return "CLI Tool" + case topics["automation"] || strings.Contains(desc, "automation") || strings.Contains(desc, "automat"): + return "Automation" + case topics["observability"] || strings.Contains(desc, "observability") || strings.Contains(desc, "collector"): + return "Observability" + case topics["framework"] || strings.Contains(desc, "framework") || strings.Contains(desc, "bridging"): + return "Framework" + default: + return "Library" + } +} + +// buildSectionIndex generates a lightweight Hugo section index (_index.md) for a +// project. Contains only frontmatter metadata so the Doks sidebar renders the +// section heading as a collapsible toggle with child pages listed underneath. +func buildSectionIndex(repo Repo, sha, readmeSHA string) string { + lang := languageOrDefault(repo.Language) + title := formatRepoTitle(repo.Name) + + var b strings.Builder + b.WriteString("---\n") + fmt.Fprintf(&b, "title: %q\n", title) + fmt.Fprintf(&b, "linkTitle: %q\n", repo.Name) + fmt.Fprintf(&b, "description: %q\n", repo.Description) + fmt.Fprintf(&b, "date: %s\n", repo.PushedAt) + fmt.Fprintf(&b, "lastmod: %s\n", repo.PushedAt) + b.WriteString("draft: false\n") + b.WriteString("toc: false\n") + b.WriteString("params:\n") + fmt.Fprintf(&b, " language: %q\n", lang) + fmt.Fprintf(&b, " stars: %d\n", repo.StargazersCount) + fmt.Fprintf(&b, " repo: %q\n", repo.HTMLURL) + fmt.Fprintf(&b, " source_sha: %q\n", sha) + fmt.Fprintf(&b, " readme_sha: %q\n", readmeSHA) + b.WriteString(" seo:\n") + fmt.Fprintf(&b, " title: %q\n", title+" | ComplyTime") + fmt.Fprintf(&b, " description: %q\n", repo.Description) + b.WriteString("---\n") + + return b.String() +} + +// buildOverviewPage generates the README content as a child page (overview.md) +// so it appears as a navigable sidebar link in the Doks theme. +func buildOverviewPage(repo Repo, readme string) string { + editURL := fmt.Sprintf("https://github.com/%s/edit/%s/README.md", repo.FullName, repo.DefaultBranch) + + var b strings.Builder + b.WriteString("---\n") + fmt.Fprintf(&b, "title: %q\n", "Overview") + fmt.Fprintf(&b, "description: %q\n", repo.Description) + fmt.Fprintf(&b, "date: %s\n", repo.PushedAt) + fmt.Fprintf(&b, "lastmod: %s\n", repo.PushedAt) + b.WriteString("draft: false\n") + b.WriteString("toc: true\n") + fmt.Fprintf(&b, "weight: %d\n", 1) + b.WriteString("params:\n") + fmt.Fprintf(&b, " editURL: %q\n", editURL) + b.WriteString("---\n\n") + b.WriteString(readme) + + return b.String() +} + +// knownAcronyms maps lowercase tokens to their canonical uppercase form. +// Used by smartTitle to preserve intended casing for common technical terms. +var knownAcronyms = map[string]string{ + "api": "API", + "apis": "APIs", + "cac": "CAC", + "ci": "CI", + "cd": "CD", + "cli": "CLI", + "cpu": "CPU", + "css": "CSS", + "dns": "DNS", + "faq": "FAQ", + "grpc": "gRPC", + "html": "HTML", + "http": "HTTP", + "https": "HTTPS", + "id": "ID", + "io": "I/O", + "ip": "IP", + "json": "JSON", + "jwt": "JWT", + "k8s": "K8s", + "oauth": "OAuth", + "openid": "OpenID", + "oscal": "OSCAL", + "rbac": "RBAC", + "rest": "REST", + "sdk": "SDK", + "sql": "SQL", + "ssh": "SSH", + "sso": "SSO", + "tcp": "TCP", + "tls": "TLS", + "toml": "TOML", + "ui": "UI", + "uri": "URI", + "url": "URL", + "uuid": "UUID", + "vm": "VM", + "xml": "XML", + "yaml": "YAML", +} + +// smartTitle capitalises the first letter of each word, but preserves +// canonical casing for known acronyms (e.g. "api" → "API", "cac" → "CAC"). +func smartTitle(words []string) string { + for i, w := range words { + if canonical, ok := knownAcronyms[strings.ToLower(w)]; ok { + words[i] = canonical + continue + } + if len(w) > 0 { + words[i] = strings.ToUpper(w[:1]) + strings.ToLower(w[1:]) + } + } + return strings.Join(words, " ") +} + +// formatRepoTitle converts a GitHub repo name (typically lowercase/kebab-case) +// into a human-readable title for Hugo frontmatter. +// E.g. "complyctl" → "Complyctl", "oscal-sdk" → "OSCAL SDK". +func formatRepoTitle(repoName string) string { + words := strings.FieldsFunc(repoName, func(r rune) bool { + return r == '-' || r == '_' + }) + return smartTitle(words) +} + +// titleFromFilename converts a Markdown filename stem to a human-readable title. +// E.g. "quick-start" → "Quick Start", "sync_cac_content" → "Sync CAC Content". +func titleFromFilename(name string) string { + name = strings.TrimSuffix(name, filepath.Ext(name)) + name = strings.NewReplacer("-", " ", "_", " ").Replace(name) + words := strings.Fields(name) + return smartTitle(words) +} + +// buildDocPage generates a Hugo doc page with auto-generated frontmatter +// derived from the file path. The title comes from the filename, the +// description combines the repo description with the title, and a provenance +// comment is inserted after the frontmatter closing delimiter. +func buildDocPage(filePath, repoFullName, repoDescription, pushedAt, branch, sha, content string) string { + title := titleFromFilename(filepath.Base(filePath)) + + shortSHA := sha + if len(shortSHA) > 12 { + shortSHA = shortSHA[:12] + } + + editURL := fmt.Sprintf("https://github.com/%s/edit/%s/%s", repoFullName, branch, filePath) + + var b strings.Builder + b.WriteString("---\n") + fmt.Fprintf(&b, "title: %q\n", title) + fmt.Fprintf(&b, "description: %q\n", repoDescription+" — "+title) + fmt.Fprintf(&b, "date: %s\n", pushedAt) + fmt.Fprintf(&b, "lastmod: %s\n", pushedAt) + b.WriteString("draft: false\n") + fmt.Fprintf(&b, "weight: %d\n", 10) + b.WriteString("params:\n") + fmt.Fprintf(&b, " editURL: %q\n", editURL) + b.WriteString("---\n") + fmt.Fprintf(&b, "\n\n", repoFullName, filePath, branch, shortSHA) + b.WriteString(content) + + return b.String() +} + +// buildProjectCard constructs a ProjectCard from repo metadata. +func buildProjectCard(repo Repo) ProjectCard { + return ProjectCard{ + Name: repo.Name, + Language: languageOrDefault(repo.Language), + Type: deriveProjectType(repo), + Description: repo.Description, + URL: fmt.Sprintf("/docs/projects/%s/", repo.Name), + Repo: repo.HTMLURL, + Stars: repo.StargazersCount, + } +} diff --git a/cmd/sync-content/hugo_test.go b/cmd/sync-content/hugo_test.go new file mode 100644 index 0000000..f21f962 --- /dev/null +++ b/cmd/sync-content/hugo_test.go @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: Apache-2.0 +package main + +import ( + "strings" + "testing" +) + +func TestFormatRepoTitle(t *testing.T) { + cases := []struct { + input string + want string + }{ + {"complyctl", "Complyctl"}, + {"oscal-sdk", "OSCAL SDK"}, + {"cac-content-sync", "CAC Content Sync"}, + {"my-cli-tool", "My CLI Tool"}, + {"rest-api-server", "REST API Server"}, + {"simple", "Simple"}, + {"json-yaml-converter", "JSON YAML Converter"}, + {"k8s-operator", "K8s Operator"}, + {"oauth-grpc-bridge", "OAuth gRPC Bridge"}, + } + + for _, tc := range cases { + t.Run(tc.input, func(t *testing.T) { + got := formatRepoTitle(tc.input) + if got != tc.want { + t.Errorf("formatRepoTitle(%q) = %q, want %q", tc.input, got, tc.want) + } + }) + } +} + +func TestTitleFromFilename(t *testing.T) { + cases := []struct { + input string + want string + }{ + {"quick-start.md", "Quick Start"}, + {"sync_cac_content.md", "Sync CAC Content"}, + {"api-reference.md", "API Reference"}, + {"installation.md", "Installation"}, + {"cli-usage.md", "CLI Usage"}, + {"rest-api.md", "REST API"}, + {"getting-started", "Getting Started"}, + {"CONTRIBUTING.md", "Contributing"}, + {"PLUGIN_GUIDE.md", "Plugin Guide"}, + {"RELEASE-PROCESS.md", "Release Process"}, + } + + for _, tc := range cases { + t.Run(tc.input, func(t *testing.T) { + got := titleFromFilename(tc.input) + if got != tc.want { + t.Errorf("titleFromFilename(%q) = %q, want %q", tc.input, got, tc.want) + } + }) + } +} + +func TestSmartTitle(t *testing.T) { + cases := []struct { + name string + input []string + want string + }{ + {"plain words", []string{"hello", "world"}, "Hello World"}, + {"acronym api", []string{"my", "api"}, "My API"}, + {"mixed case via acronym map", []string{"OAuth", "setup"}, "OAuth Setup"}, + {"already uppercase acronym", []string{"CLI"}, "CLI"}, + {"h6 cap", []string{"some", "uuid", "generator"}, "Some UUID Generator"}, + {"all caps normalised", []string{"CONTRIBUTING"}, "Contributing"}, + {"all caps multi-word", []string{"PLUGIN", "GUIDE"}, "Plugin Guide"}, + {"mixed all-caps and acronym", []string{"OSCAL", "QUICK", "START"}, "OSCAL Quick Start"}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + input := make([]string, len(tc.input)) + copy(input, tc.input) + got := smartTitle(input) + if got != tc.want { + t.Errorf("smartTitle(%v) = %q, want %q", tc.input, got, tc.want) + } + }) + } +} + +func TestBuildSectionIndex(t *testing.T) { + repo := Repo{ + Name: "oscal-sdk", + FullName: "complytime/oscal-sdk", + Description: "OSCAL SDK for Go", + Language: "Go", + StargazersCount: 10, + HTMLURL: "https://github.com/complytime/oscal-sdk", + PushedAt: "2025-06-01T00:00:00Z", + } + + result := buildSectionIndex(repo, "sha-branch", "sha-readme") + + if !strings.Contains(result, `title: "OSCAL SDK"`) { + t.Error("section index title should use formatRepoTitle (OSCAL SDK)") + } + if !strings.Contains(result, `linkTitle: "oscal-sdk"`) { + t.Error("section index should have linkTitle with raw repo name for sidebar") + } + if !strings.Contains(result, `seo:`) { + t.Error("section index should have seo params") + } + if !strings.Contains(result, `title: "OSCAL SDK | ComplyTime"`) { + t.Error("SEO title should use formatted repo title") + } + if !strings.Contains(result, "readme_sha:") { + t.Error("section index should contain readme_sha") + } +} + +func TestBuildDocPage(t *testing.T) { + content := "## Getting Started\n\nSome content here." + result := buildDocPage( + "docs/api-reference.md", + "complytime/complyctl", + "A CLI tool", + "2025-06-01T00:00:00Z", + "main", + "abc123def456789", + content, + ) + + if !strings.Contains(result, `title: "API Reference"`) { + t.Error("doc page title should use titleFromFilename with acronym handling") + } + if !strings.Contains(result, `description: "A CLI tool — API Reference"`) { + t.Error("description should combine repo description with title") + } + if !strings.Contains(result, "\n", + src.Repo, file.Src, src.Branch, shortSHA, + ) + out = insertAfterFrontmatter(out, []byte(provenance)) + + destPath := filepath.Join(output, file.Dest) + + if !isUnderDir(output, destPath) { + logger.Error("path traversal blocked", "dest", file.Dest, "resolved", destPath) + result.addError() + continue + } + + if !write { + logger.Info("would write config file (dry-run)", "src", file.Src, "dest", destPath) + result.addSynced() + continue + } + + written, err := writeFileSafe(destPath, out) + if err != nil { + logger.Error("error writing config file", "src", file.Src, "dest", destPath, "error", err) + result.addError() + continue + } + + result.recordFile(file.Dest) + + if written { + logger.Info("wrote config file", "src", file.Src, "dest", destPath) + } else { + logger.Info("config file unchanged", "src", file.Src, "dest", destPath) + } + + result.addSynced() + } +} + +func parseNameList(raw string) map[string]bool { + set := make(map[string]bool) + for _, name := range strings.Split(raw, ",") { + name = strings.TrimSpace(name) + if name != "" { + set[name] = true + } + } + return set +} + +// repoWork holds the inputs and outputs for processing a single repo. +type repoWork struct { + repo Repo + sha string + card ProjectCard + unchanged bool +} + +// processRepo handles a single repository: fetches content, writes pages. +// When skipReadme is true, README fetching and project page generation are +// skipped but the ProjectCard is still produced. +// +// lockedSHA, when non-empty, pins content fetches to the approved commit. +// If the upstream branch has moved past the lock, content is still fetched +// at the locked version so only reviewed content reaches production. +// +// Two-tier change detection: +// 1. Branch SHA unchanged → skip all fetches (fast path). +// 2. Branch SHA changed → fetch README, compare blob SHA for accurate +// content-level change reporting. +// +// All shared state mutations go through result.mu. +func processRepo(ctx context.Context, gh *apiClient, org, output string, repo Repo, write bool, skipReadme bool, result *syncResult, oldState map[string]repoState, oldManifest map[string]bool, lockedSHA string) *repoWork { + logger := slog.With("repo", repo.Name) + + sha, err := gh.getBranchSHA(ctx, org, repo.Name, repo.DefaultBranch) + if err != nil { + logger.Warn("could not get branch SHA", "error", err) + sha = "unknown" + result.addWarning() + } + + old, existed := oldState[repo.Name] + + // Fast path: branch hasn't changed since last sync — skip all fetches. + if existed && old.branchSHA == sha { + result.mu.Lock() + result.unchanged = append(result.unchanged, repo.Name) + result.mu.Unlock() + result.addSynced() + + if !write { + logger.Info("unchanged (branch SHA match), skipping", "sha", sha) + return &repoWork{repo: repo, sha: sha, card: buildProjectCard(repo), unchanged: true} + } + + logger.Info("unchanged (branch SHA match), skipping fetches", "sha", sha) + if oldManifest != nil { + carryForwardManifest(result, repo.Name, oldManifest) + } + + return &repoWork{repo: repo, sha: sha, card: buildProjectCard(repo), unchanged: true} + } + + // Dry-run: report what would happen without fetching content. + if !write { + result.mu.Lock() + if !existed { + result.added = append(result.added, repo.Name) + } else { + result.updated = append(result.updated, repo.Name) + } + result.mu.Unlock() + result.addSynced() + logger.Info("would sync (dry-run)", "sha", sha) + return &repoWork{repo: repo, sha: sha, card: buildProjectCard(repo)} + } + + // Slow path: branch SHA changed — fetch content and compare file-level SHAs. + // When a lock is active, fetch at the locked commit rather than HEAD. + fetchRef := "" + if lockedSHA != "" && lockedSHA != sha { + fetchRef = lockedSHA + } + + contentChanged := !existed + var readmeSHA string + + if !skipReadme { + readme, rSHA, err := gh.getREADME(ctx, org, repo.Name, fetchRef) + readmeSHA = rSHA + if err != nil { + logger.Warn("no README found", "error", err) + result.addWarning() + } + + if existed && old.readmeSHA != "" && old.readmeSHA == readmeSHA { + logger.Info("README unchanged despite branch update", "branch_sha", sha, "readme_sha", readmeSHA) + } else { + contentChanged = true + } + + if readme != "" { + readme = stripLeadingH1(readme) + readme = shiftHeadings(readme) + readme = titleCaseHeadings(readme) + readme = stripBadges(readme) + readme = rewriteRelativeLinks(readme, org, repo.Name, repo.DefaultBranch) + } else { + readme = fmt.Sprintf( + "*No README available.* Visit the [repository on GitHub](%s) for more information.\n", + repo.HTMLURL, + ) + } + + indexPage := buildSectionIndex(repo, sha, readmeSHA) + indexRel := filepath.Join("content", "docs", "projects", repo.Name, "_index.md") + indexPath := filepath.Join(output, indexRel) + if !isUnderDir(output, indexPath) { + logger.Error("path traversal blocked", "path", indexRel) + result.addError() + return nil + } + written, err := writeFileSafe(indexPath, []byte(indexPage)) + if err != nil { + logger.Error("error writing section index", "path", indexPath, "error", err) + result.addError() + return nil + } + result.recordFile(indexRel) + if written { + logger.Info("wrote section index", "path", indexPath) + } else { + logger.Info("section index unchanged", "path", indexPath) + } + + overviewPage := buildOverviewPage(repo, readme) + overviewRel := filepath.Join("content", "docs", "projects", repo.Name, "overview.md") + overviewPath := filepath.Join(output, overviewRel) + if !isUnderDir(output, overviewPath) { + logger.Error("path traversal blocked", "path", overviewRel) + result.addError() + return nil + } + written, err = writeFileSafe(overviewPath, []byte(overviewPage)) + if err != nil { + logger.Error("error writing overview page", "path", overviewPath, "error", err) + result.addError() + return nil + } + result.recordFile(overviewRel) + if written { + logger.Info("wrote overview page", "path", overviewPath) + } else { + logger.Info("overview page unchanged", "path", overviewPath) + } + } + + result.mu.Lock() + switch { + case !existed: + result.added = append(result.added, repo.Name) + case contentChanged: + result.updated = append(result.updated, repo.Name) + default: + result.unchanged = append(result.unchanged, repo.Name) + } + result.mu.Unlock() + result.addSynced() + + return &repoWork{repo: repo, sha: sha, card: buildProjectCard(repo)} +} + +// syncRepoDocPages auto-syncs Markdown files found under each scan_path in the +// discovery config. Files already tracked by explicit config sources or listed +// in ignoreFiles are skipped. Intermediate directories get auto-generated +// _index.md section pages. When ref is non-empty, content is fetched at that +// specific commit SHA. +func syncRepoDocPages(ctx context.Context, gh *apiClient, org string, repo Repo, output string, write bool, discovery Discovery, ignoreFiles map[string]bool, configTracked map[string]bool, result *syncResult, ref string) { + logger := slog.With("repo", repo.Name, "phase", "doc-pages") + + for _, scanPath := range discovery.ScanPaths { + files, err := gh.listDirMD(ctx, org, repo.Name, scanPath, ref) + if err != nil { + logger.Debug("scan path not found", "path", scanPath, "error", err) + continue + } + + neededDirs := make(map[string]bool) + + for _, filePath := range files { + baseName := filepath.Base(filePath) + if ignoreFiles[baseName] { + continue + } + if configTracked[filePath] { + continue + } + + relPath := strings.TrimPrefix(filePath, scanPath+"/") + destRel := filepath.Join("content", "docs", "projects", repo.Name, relPath) + destPath := filepath.Join(output, destRel) + + if !isUnderDir(output, destPath) { + logger.Error("path traversal blocked", "src", filePath, "dest", destRel) + result.addError() + continue + } + + dir := filepath.Dir(relPath) + for dir != "." && dir != "" { + neededDirs[dir] = true + dir = filepath.Dir(dir) + } + + if !write { + logger.Info("would write doc page (dry-run)", "src", filePath, "dest", destRel) + result.addSynced() + continue + } + + content, sha, err := gh.getFileContent(ctx, org, repo.Name, filePath, ref) + if err != nil { + logger.Warn("could not fetch doc file", "path", filePath, "error", err) + result.addWarning() + continue + } + + content = stripBadges(content) + content = stripLeadingH1(content) + content = shiftHeadings(content) + content = titleCaseHeadings(content) + fileDir := filepath.Dir(filePath) + content = rewriteRelativeLinks(content, org, repo.Name, repo.DefaultBranch, fileDir) + + page := buildDocPage(filePath, repo.FullName, repo.Description, repo.PushedAt, repo.DefaultBranch, sha, content) + + written, err := writeFileSafe(destPath, []byte(page)) + if err != nil { + logger.Error("error writing doc page", "path", destPath, "error", err) + result.addError() + continue + } + + result.recordFile(destRel) + if written { + logger.Info("wrote doc page", "src", filePath, "dest", destPath) + } else { + logger.Info("doc page unchanged", "src", filePath, "dest", destPath) + } + + result.addSynced() + } + + for dir := range neededDirs { + indexRel := filepath.Join("content", "docs", "projects", repo.Name, dir, "_index.md") + indexPath := filepath.Join(output, indexRel) + + if !isUnderDir(output, indexPath) { + logger.Error("path traversal blocked for section index", "path", indexRel) + result.addError() + continue + } + + if _, err := os.Stat(indexPath); err == nil { + result.recordFile(indexRel) + continue + } + + if !write { + continue + } + + title := titleFromFilename(filepath.Base(dir)) + var b strings.Builder + b.WriteString("---\n") + fmt.Fprintf(&b, "title: %q\n", title) + fmt.Fprintf(&b, "description: %q\n", repo.Description+" — "+title) + fmt.Fprintf(&b, "date: %s\n", repo.PushedAt) + fmt.Fprintf(&b, "lastmod: %s\n", repo.PushedAt) + b.WriteString("draft: false\n") + b.WriteString("---\n") + + written, err := writeFileSafe(indexPath, []byte(b.String())) + if err != nil { + logger.Error("error writing section index", "path", indexPath, "error", err) + continue + } + + result.recordFile(indexRel) + if written { + logger.Info("wrote section index", "path", indexPath) + } + } + } +} + +// writeGitHubOutputs writes structured outputs for GitHub Actions integration. +func writeGitHubOutputs(result *syncResult) { + if ghOutput := os.Getenv("GITHUB_OUTPUT"); ghOutput != "" { + ghOutput = filepath.Clean(ghOutput) + f, err := os.OpenFile(ghOutput, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o600) //nolint:gosec // G304: path from trusted Actions env + if err == nil { + defer func() { _ = f.Close() }() + hasChanges := "false" + if result.hasChanges() { + hasChanges = "true" + } + _, _ = fmt.Fprintf(f, "has_changes=%s\n", hasChanges) + _, _ = fmt.Fprintf(f, "changed_count=%d\n", len(result.added)+len(result.updated)) + _, _ = fmt.Fprintf(f, "error_count=%d\n", result.errors) + } + } + + if summaryPath := os.Getenv("GITHUB_STEP_SUMMARY"); summaryPath != "" { + summaryPath = filepath.Clean(summaryPath) + f, err := os.OpenFile(summaryPath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o600) //nolint:gosec // G304: path from trusted Actions env + if err == nil { + defer func() { _ = f.Close() }() + _, _ = fmt.Fprint(f, result.toMarkdown()) + } + } +} diff --git a/cmd/sync-content/sync_test.go b/cmd/sync-content/sync_test.go new file mode 100644 index 0000000..2a784c2 --- /dev/null +++ b/cmd/sync-content/sync_test.go @@ -0,0 +1,830 @@ +// SPDX-License-Identifier: Apache-2.0 +package main + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + "sync" + "testing" +) + +func TestProcessRepo(t *testing.T) { + readmeContent := "# test-repo\n\nThis is a test README." + branchSHA := "abc123def456" + + mux := http.NewServeMux() + + mux.HandleFunc("/repos/testorg/test-repo/readme", func(w http.ResponseWriter, r *http.Request) { + _ = json.NewEncoder(w).Encode(FileResponse{ + Content: b64(readmeContent), + Encoding: "base64", + SHA: "sha-readme", + }) + }) + mux.HandleFunc("/repos/testorg/test-repo/branches/main", func(w http.ResponseWriter, r *http.Request) { + resp := BranchResponse{} + resp.Commit.SHA = branchSHA + _ = json.NewEncoder(w).Encode(resp) + }) + server := httptest.NewServer(mux) + defer server.Close() + + gh := newTestClient(server.URL) + output := t.TempDir() + ctx := context.Background() + + repo := Repo{ + Name: "test-repo", + FullName: "testorg/test-repo", + Description: "A test repository", + Language: "Go", + HTMLURL: "https://github.com/testorg/test-repo", + DefaultBranch: "main", + } + + result := &syncResult{} + oldState := map[string]repoState{} + + work := processRepo(ctx, gh, "testorg", output, repo, true, false, result, oldState, nil, "") + + if work == nil { + t.Fatal("processRepo returned nil") + } + if work.card.Name != "test-repo" { + t.Errorf("card.Name = %q, want %q", work.card.Name, "test-repo") + } + if work.card.Language != "Go" { + t.Errorf("card.Language = %q, want %q", work.card.Language, "Go") + } + if work.card.Description != "A test repository" { + t.Errorf("card.Description = %q, want %q", work.card.Description, "A test repository") + } + + indexPath := filepath.Join(output, "content", "docs", "projects", "test-repo", "_index.md") + data, err := os.ReadFile(indexPath) + if err != nil { + t.Fatalf("section index not written: %v", err) + } + index := string(data) + if !strings.Contains(index, `title: "Test Repo"`) { + t.Error("section index title should use formatRepoTitle") + } + if !strings.Contains(index, `linkTitle: "test-repo"`) { + t.Error("section index should have linkTitle with raw repo name") + } + if !strings.Contains(index, "readme_sha:") { + t.Error("section index should contain readme_sha in frontmatter") + } + if !strings.Contains(index, "sha-readme") { + t.Error("section index should contain the README blob SHA value") + } + if strings.Contains(index, "This is a test README.") { + t.Error("section index should be frontmatter-only, no README body") + } + + overviewPath := filepath.Join(output, "content", "docs", "projects", "test-repo", "overview.md") + overviewData, err := os.ReadFile(overviewPath) + if err != nil { + t.Fatalf("overview page not written: %v", err) + } + overview := string(overviewData) + if !strings.Contains(overview, "This is a test README.") { + t.Error("overview page should contain README body") + } + if strings.Contains(overview, "# test-repo") || strings.Contains(overview, "## Test-repo") { + t.Error("leading H1 should be stripped — title is already in frontmatter") + } + if !strings.Contains(overview, `title: "Overview"`) { + t.Error("overview page should have title 'Overview'") + } + if work.unchanged { + t.Error("unchanged should be false for new repos") + } +} + +func TestProcessRepo_BranchUnchanged(t *testing.T) { + branchSHA := "abc123def456" + readmeCalls := 0 + + mux := http.NewServeMux() + mux.HandleFunc("/repos/testorg/test-repo/branches/main", func(w http.ResponseWriter, r *http.Request) { + resp := BranchResponse{} + resp.Commit.SHA = branchSHA + _ = json.NewEncoder(w).Encode(resp) + }) + mux.HandleFunc("/repos/testorg/test-repo/readme", func(w http.ResponseWriter, r *http.Request) { + readmeCalls++ + _ = json.NewEncoder(w).Encode(FileResponse{ + Content: b64("# test-repo\n\nContent"), + Encoding: "base64", + SHA: "sha-readme", + }) + }) + + server := httptest.NewServer(mux) + defer server.Close() + + gh := newTestClient(server.URL) + output := t.TempDir() + ctx := context.Background() + + repo := Repo{ + Name: "test-repo", + FullName: "testorg/test-repo", + Description: "A test repository", + Language: "Go", + HTMLURL: "https://github.com/testorg/test-repo", + DefaultBranch: "main", + } + + oldState := map[string]repoState{ + "test-repo": {branchSHA: branchSHA, readmeSHA: "sha-readme"}, + } + oldManifest := map[string]bool{ + "content/docs/projects/test-repo/_index.md": true, + } + + result := &syncResult{} + work := processRepo(ctx, gh, "testorg", output, repo, true, false, result, oldState, oldManifest, "") + + if work == nil { + t.Fatal("processRepo returned nil for unchanged repo in write mode") + } + if work.card.Name != "test-repo" { + t.Errorf("card.Name = %q, want %q", work.card.Name, "test-repo") + } + if readmeCalls != 0 { + t.Errorf("README was fetched %d times, want 0 (fast path should skip)", readmeCalls) + } + if !work.unchanged { + t.Error("unchanged should be true when branch SHA matches") + } + if len(result.unchanged) != 1 || result.unchanged[0] != "test-repo" { + t.Errorf("unchanged = %v, want [test-repo]", result.unchanged) + } + if len(result.writtenFiles) != 1 { + t.Errorf("writtenFiles = %d, want 1 (carried forward from manifest)", len(result.writtenFiles)) + } +} + +func TestProcessRepo_BranchChangedReadmeUnchanged(t *testing.T) { + readmeContent := "# test-repo\n\nThis is a test README." + readmeSHA := "sha-readme-stable" + + mux := http.NewServeMux() + mux.HandleFunc("/repos/testorg/test-repo/readme", func(w http.ResponseWriter, r *http.Request) { + _ = json.NewEncoder(w).Encode(FileResponse{ + Content: b64(readmeContent), + Encoding: "base64", + SHA: readmeSHA, + }) + }) + mux.HandleFunc("/repos/testorg/test-repo/branches/main", func(w http.ResponseWriter, r *http.Request) { + resp := BranchResponse{} + resp.Commit.SHA = "new-branch-sha" + _ = json.NewEncoder(w).Encode(resp) + }) + server := httptest.NewServer(mux) + defer server.Close() + + gh := newTestClient(server.URL) + output := t.TempDir() + ctx := context.Background() + + repo := Repo{ + Name: "test-repo", + FullName: "testorg/test-repo", + Description: "A test repository", + Language: "Go", + HTMLURL: "https://github.com/testorg/test-repo", + DefaultBranch: "main", + } + + oldState := map[string]repoState{ + "test-repo": {branchSHA: "old-branch-sha", readmeSHA: readmeSHA}, + } + + result := &syncResult{} + work := processRepo(ctx, gh, "testorg", output, repo, true, false, result, oldState, nil, "") + + if work == nil { + t.Fatal("processRepo returned nil") + } + if len(result.unchanged) != 1 || result.unchanged[0] != "test-repo" { + t.Errorf("repo should be classified as unchanged when README SHA matches, got unchanged=%v updated=%v", result.unchanged, result.updated) + } +} + +func TestSyncConfigSource(t *testing.T) { + fileContent := "[![badge](https://img.svg)](https://ci)\n\n# complyctl\n\nSome [link](docs/guide.md) here." + + mux := http.NewServeMux() + mux.HandleFunc("/repos/org/complyctl/contents/README.md", func(w http.ResponseWriter, r *http.Request) { + _ = json.NewEncoder(w).Encode(FileResponse{ + Content: b64(fileContent), + Encoding: "base64", + SHA: "sha-file", + }) + }) + + server := httptest.NewServer(mux) + defer server.Close() + + gh := newTestClient(server.URL) + output := t.TempDir() + ctx := context.Background() + + src := Source{ + Repo: "org/complyctl", + Branch: "main", + Files: []FileSpec{ + { + Src: "README.md", + Dest: "content/docs/projects/complyctl/_index.md", + Transform: Transform{ + InjectFrontmatter: map[string]any{ + "title": "complyctl", + "description": "CLI tool", + "weight": 10, + }, + RewriteLinks: true, + StripBadges: true, + }, + }, + }, + } + + t.Run("write mode applies transforms", func(t *testing.T) { + result := &syncResult{} + syncConfigSource(ctx, gh, src, Defaults{Branch: "main"}, output, true, result, "") + + if result.errors > 0 { + t.Fatalf("syncConfigSource had %d errors", result.errors) + } + if result.synced != 1 { + t.Errorf("synced = %d, want 1", result.synced) + } + + destPath := filepath.Join(output, "content", "docs", "projects", "complyctl", "_index.md") + data, err := os.ReadFile(destPath) + if err != nil { + t.Fatalf("config file not written: %v", err) + } + content := string(data) + + if !strings.Contains(content, "title: complyctl") { + t.Error("injected frontmatter should contain title") + } + if strings.Contains(content, "[![badge") { + t.Error("badges should be stripped") + } + if strings.Contains(content, "](docs/guide.md)") { + t.Error("relative links should be rewritten") + } + if !strings.Contains(content, "https://github.com/org/complyctl/blob/main/docs/guide.md") { + t.Error("relative link should become absolute GitHub URL") + } + if strings.Contains(content, "# complyctl") || strings.Contains(content, "## Complyctl") { + t.Error("leading H1 should be stripped — title is already in frontmatter") + } + }) + + t.Run("dry-run writes nothing", func(t *testing.T) { + dryOutput := t.TempDir() + result := &syncResult{} + syncConfigSource(ctx, gh, src, Defaults{Branch: "main"}, dryOutput, false, result, "") + + if result.synced != 1 { + t.Errorf("dry-run synced = %d, want 1", result.synced) + } + + destPath := filepath.Join(dryOutput, "content", "docs", "projects", "complyctl", "_index.md") + if _, err := os.Stat(destPath); !os.IsNotExist(err) { + t.Error("dry-run should not create files") + } + }) +} + +func TestConcurrentSyncResult(t *testing.T) { + result := &syncResult{} + var wg sync.WaitGroup + + for range 100 { + wg.Add(1) + go func() { + defer wg.Done() + result.addSynced() + }() + } + + for range 50 { + wg.Add(1) + go func() { + defer wg.Done() + result.addError() + }() + } + + for range 25 { + wg.Add(1) + go func() { + defer wg.Done() + result.addWarning() + }() + } + + wg.Wait() + + if result.synced != 100 { + t.Errorf("synced = %d, want 100", result.synced) + } + if result.errors != 50 { + t.Errorf("errors = %d, want 50", result.errors) + } + if result.warnings != 25 { + t.Errorf("warnings = %d, want 25", result.warnings) + } +} + +func TestRecordFile(t *testing.T) { + result := &syncResult{} + var wg sync.WaitGroup + for i := range 50 { + wg.Add(1) + go func(n int) { + defer wg.Done() + result.recordFile(fmt.Sprintf("file-%d.md", n)) + }(i) + } + wg.Wait() + if len(result.writtenFiles) != 50 { + t.Errorf("writtenFiles = %d, want 50", len(result.writtenFiles)) + } +} + +func TestSyncConfigSourceProvenance(t *testing.T) { + fileContent := "# complyctl\n\nSome content." + fileSHA := "abc123def456789" + + mux := http.NewServeMux() + mux.HandleFunc("/repos/org/complyctl/contents/README.md", func(w http.ResponseWriter, r *http.Request) { + _ = json.NewEncoder(w).Encode(FileResponse{ + Content: b64(fileContent), + Encoding: "base64", + SHA: fileSHA, + }) + }) + + server := httptest.NewServer(mux) + defer server.Close() + + gh := newTestClient(server.URL) + output := t.TempDir() + ctx := context.Background() + + src := Source{ + Repo: "org/complyctl", + Branch: "main", + Files: []FileSpec{ + { + Src: "README.md", + Dest: "content/docs/projects/complyctl/_index.md", + Transform: Transform{ + InjectFrontmatter: map[string]any{"title": "complyctl"}, + }, + }, + }, + } + + result := &syncResult{} + syncConfigSource(ctx, gh, src, Defaults{Branch: "main"}, output, true, result, "") + + destPath := filepath.Join(output, "content", "docs", "projects", "complyctl", "_index.md") + data, err := os.ReadFile(destPath) + if err != nil { + t.Fatalf("file not written: %v", err) + } + content := string(data) + + if !strings.Contains(content, "") { + t.Errorf("provenance comment missing or incorrect, got:\n%s", content) + } +} + +func TestSyncRepoDocPages(t *testing.T) { + mux := http.NewServeMux() + + mux.HandleFunc("/repos/testorg/test-repo/contents/docs", func(w http.ResponseWriter, r *http.Request) { + _ = json.NewEncoder(w).Encode([]DirEntry{ + {Name: "installation.md", Path: "docs/installation.md", Type: "file"}, + {Name: "usage.md", Path: "docs/usage.md", Type: "file"}, + }) + }) + mux.HandleFunc("/repos/testorg/test-repo/contents/docs/installation.md", func(w http.ResponseWriter, r *http.Request) { + _ = json.NewEncoder(w).Encode(FileResponse{ + Content: b64("# Installation\n\nRun `go install`."), + Encoding: "base64", + SHA: "sha-install", + }) + }) + mux.HandleFunc("/repos/testorg/test-repo/contents/docs/usage.md", func(w http.ResponseWriter, r *http.Request) { + _ = json.NewEncoder(w).Encode(FileResponse{ + Content: b64("# Usage\n\nRun the CLI tool."), + Encoding: "base64", + SHA: "sha-usage", + }) + }) + + server := httptest.NewServer(mux) + defer server.Close() + + gh := newTestClient(server.URL) + output := t.TempDir() + ctx := context.Background() + + repo := Repo{ + Name: "test-repo", + FullName: "testorg/test-repo", + Description: "A test repository", + Language: "Go", + HTMLURL: "https://github.com/testorg/test-repo", + DefaultBranch: "main", + PushedAt: "2025-01-15T00:00:00Z", + } + + discovery := Discovery{ScanPaths: []string{"docs"}} + result := &syncResult{} + syncRepoDocPages(ctx, gh, "testorg", repo, output, true, discovery, nil, nil, result, "") + + if result.errors != 0 { + t.Fatalf("errors = %d, want 0", result.errors) + } + if result.synced != 2 { + t.Errorf("synced = %d, want 2", result.synced) + } + + cases := []struct { + relPath string + title string + provSrc string + }{ + { + relPath: "content/docs/projects/test-repo/installation.md", + title: "Installation", + provSrc: "testorg/test-repo/docs/installation.md@main", + }, + { + relPath: "content/docs/projects/test-repo/usage.md", + title: "Usage", + provSrc: "testorg/test-repo/docs/usage.md@main", + }, + } + + for _, tc := range cases { + fullPath := filepath.Join(output, tc.relPath) + data, err := os.ReadFile(fullPath) + if err != nil { + t.Fatalf("file not written: %s: %v", tc.relPath, err) + } + content := string(data) + + if !strings.Contains(content, fmt.Sprintf("title: %q", tc.title)) { + t.Errorf("%s: missing title %q in frontmatter:\n%s", tc.relPath, tc.title, content) + } + if !strings.Contains(content, "draft: false") { + t.Errorf("%s: missing draft: false", tc.relPath) + } + if !strings.Contains(content, "weight: 10") { + t.Errorf("%s: missing weight: 10", tc.relPath) + } + if !strings.Contains(content, "date: 2025-01-15T00:00:00Z") { + t.Errorf("%s: missing or wrong date", tc.relPath) + } + if !strings.Contains(content, "\n") + result := string(insertAfterFrontmatter(content, insert)) + + if !strings.Contains(result, "---\n") { + t.Errorf("provenance should appear after closing ---, got:\n%s", result) + } + if !strings.Contains(result, "Body text") { + t.Error("body should be preserved") + } + }) + + t.Run("without frontmatter", func(t *testing.T) { + content := []byte("# Hello\n\nBody text") + insert := []byte("\n") + result := string(insertAfterFrontmatter(content, insert)) + + if !strings.HasPrefix(result, "") { + t.Errorf("provenance should be prepended when no frontmatter, got:\n%s", result) + } + if !strings.Contains(result, "# Hello") { + t.Error("content should be preserved") + } + }) +} diff --git a/config/_default/params.toml b/config/_default/params.toml index fcf0878..8073a31 100644 --- a/config/_default/params.toml +++ b/config/_default/params.toml @@ -44,7 +44,7 @@ mainSections = ["docs"] sectionNav = ["docs"] toTopButton = true breadcrumbTrail = true - headlineHash = true + headlineHash = false scrollSpy = true # Multilingual diff --git a/content/docs/projects/_index.md b/content/docs/projects/_index.md index ac8ede7..734877b 100644 --- a/content/docs/projects/_index.md +++ b/content/docs/projects/_index.md @@ -8,142 +8,19 @@ draft: false images: [] weight: 200 toc: true +cascade: + - sidebar: + collapsed: true + _target: + kind: section + path: "{/docs/projects/*}" --- ## Core Projects ComplyTime consists of several interconnected projects, each serving a specific purpose in the compliance automation workflow. -### Command Line Tools - -
- - -
- -### Frameworks & Libraries - -
- - - -
- -### Observability & Collection - -
- -
- -### AI & Automation - -
- -
- -### Demos & Examples - -
- - -
+{{< project-cards >}} ## Getting Involved diff --git a/content/docs/projects/complyctl/_index.md b/content/docs/projects/complyctl/_index.md deleted file mode 100644 index 276956f..0000000 --- a/content/docs/projects/complyctl/_index.md +++ /dev/null @@ -1,142 +0,0 @@ ---- -description: A command-line tool for streamlining end-to-end compliance workflows. -title: complyctl -weight: 10 ---- - - -# complyctl - -ComplyCTL leverages [OSCAL](https://github.com/usnistgov/OSCAL/) to perform compliance assessment activities, using plugins for each stage of the lifecycle. - -## Documentation - -:paperclip: [Installation](https://github.com/complytime/complyctl/blob/main/docs/INSTALLATION.md)\ -:paperclip: [Quick Start](https://github.com/complytime/complyctl/blob/main/docs/QUICK_START.md)\ -:paperclip: [Sample Component Definition](https://github.com/complytime/complyctl/blob/main/docs/samples/sample-component-definition.json) - -### Basic Usage - -Determine the baseline you want to run a scan for and create an OSCAL [Assessment Plan](https://pages.nist.gov/OSCAL/learn/concepts/layer/assessment/assessment-plan/). The Assessment -Plan will act as configuration to guide the complyctl generation and scanning operations. - -### `list` command - -```bash -complyctl list -... -# Table appears with options. Look at the Framework ID column. -``` - -### `info` command - -```bash -complyctl info -# Display information about a framework's controls and rules. - -complyctl info --control -# Display details about a specific control. - -complyctl info --rule -# Display details about a specific rule. - -complyctl info --parameter -# Display details about a specific parameter. -``` - -### `plan` command - -```bash -complyctl plan -... -# The file will be written out to assessment-plan.json in the specified workspace. -# Defaults to current working directory. - -cat complytime/assessment-plan.json -# The default assessment-plan.json will be available in the complytime workspace (complytime/assessment-plan.json). - -complyctl plan --dry-run -# See the default contents of the assessment-plan.json. -``` - -Use a scope config file to customize the assessment plan: - -```bash -complyctl plan --dry-run --out config.yml -# Customize the assessment-plan.json with the 'out' flag. Updates can be made to the config.yml. -``` - -Open the `config.yml` file in a text editor and modify the YAML as desired. The example below shows various options for including and excluding rules. - -The `selectParameters` YAML key sets parameters for the `controlId`. If you try to use a value that isn't supported, an error will occur, and the valid alternative values will be displayed. To fix this, update the `value` in the `config.yml` file, and then run the command with the `--scope-config ` flag. This will generate a new `assessment-plan.json` file with the updated values. - -```yaml -frameworkId: example-framework -includeControls: -- controlId: control-01 - controlTitle: Title of Control 01 - includeRules: - - "*" # all rules included by default - selectParameters: - - name: param-1-id - value: param-1-value - - name: param-2-id - value: param-2-value -- controlId: control-02 - controlTitle: Title of Control 02 - includeRules: - - "rule-02" # only rule-02 will be included for this control - waiveRules: - - "rule-01" # rule-01 will be waived for this control -- controlId: control-03 - controlTitle: Title of Control 03 - includeRules: - - "*" - selectParameters: - - name: param-1-id - value: param-1-value - - name: param-5-id - value: param-5-value # update the value with available alternatives - excludeRules: - - "rule-03" # exclude rule-03 specific rule from control-03 -globalExcludeRules: - - "rule-99" # will be excluded for all controls, this takes priority over any includeRules, waiveRules, and globalWaiveRules clauses above -globalWaiveRules: - - "rule-50" # will be waived for all controls, this takes priority over any includeRules clauses above -``` - -The edited `config.yml` can then be used with the `plan` command to customize the assessment plan. - -```bash -complyctl plan --scope-config config.yml -# The config.yml will be loaded by passing '--scope-config' to customize the assessment-plan.json. -``` - -### `generate` command - -```bash -complyctl generate -# Run the `generate` command to generate the plugin specific policy artifacts in the workspace. -``` - -### `scan` command - -```bash -complyctl scan -# Run the `scan` command to execute the PVP plugins and create results artifacts. The results will be written to assessment-results.json in the specified workspace. - -complyctl scan --with-md -# Results can also be created in Markdown format by passing the `--with-md` flag. -``` - -## Plugin Interaction - -plugin-interaction - -## Contributing - -:paperclip: Read the [contributing guidelines](https://github.com/complytime/complyctl/blob/main/docs/CONTRIBUTING.md)\ -:paperclip: Read the [style guide](https://github.com/complytime/complyctl/blob/main/docs/STYLE_GUIDE.md)\ -:paperclip: Read and agree to the [Code of Conduct](https://github.com/complytime/complyctl/blob/main/docs/CODE_OF_CONDUCT.md) - -*Interested in writing a plugin?* See the [plugin guide](https://github.com/complytime/complyctl/blob/main/docs/PLUGIN_GUIDE.md). diff --git a/content/docs/projects/complyctl/installation.md b/content/docs/projects/complyctl/installation.md deleted file mode 100644 index 6e0f532..0000000 --- a/content/docs/projects/complyctl/installation.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -description: Install complyctl on your system. -title: Installation -weight: 30 ---- - - -# Installation - -## Binary - -- The latest binary release can be downloaded from . -- The release signature can be verified with: - ``` - cosign verify-blob --certificate complyctl_*_checksums.txt.pem --signature complyctl_*_checksums.txt.sig complytime_*_checksums.txt --certificate-oidc-issuer=https://token.actions.githubusercontent.com --certificate-identity=https://github.com/complytime/complyctl/.github/workflows/release.yml@refs/heads/main - ``` - - -## From Source - -### Prerequisites - -- **Go** version 1.20 or higher -- **Make** (optional, for using the `Makefile` if included) -- **pandoc** (optional, for generating man pages using the `make man`) - -### Clone the repository - -```bash -git clone https://github.com/complytime/complyctl.git -cd complyctl -``` - -### Build Instructions -To compile complyctl and openscap-plugin: - -```bash -make build -``` - -The binaries can be found in the `bin/` directory in the local repo. Add it to your PATH and you are all set! diff --git a/content/docs/projects/complyctl/plugin-guide.md b/content/docs/projects/complyctl/plugin-guide.md deleted file mode 100644 index e8e50de..0000000 --- a/content/docs/projects/complyctl/plugin-guide.md +++ /dev/null @@ -1,99 +0,0 @@ ---- -description: Discover, install, and manage complyctl plugins. -title: Plugin Guide -weight: 40 ---- - - -# Plugin Authoring - -Complyctl can be extended to support desired policy engines (PVPs) by the use of plugins. -The plugin acts as the integration between complyctl and the PVPs native interface. -Each plugin is responsible for converting the policy content described in OSCAL into the input format expected by the PVP. -In addition, the plugin converts the raw results provided by the PVP into the schema used by complyctl to generate OSCAL output. - -Plugins communicate with complyctl via gRPC and can be authored using any preferred language. -The plugin acts as the gRPC server while the complyctl CLI acts as the client. -When a `complyctl` command is run, it invokes the appropriate method served by the plugin. - -Complyctl is built on [compliance-to-policy-go](https://github.com/oscal-compass/compliance-to-policy-go/ which provides a flexible plugin framework for leveraging OSCAL with various PVPs. For developers choosing Golang, the same SDK can be used for plugin authoring. - -## Plugin Discovery - -Complyctl performs automated plugin discovery using the compliance-to-policy-go [plugin manager](https://github.com/complytime/compliance-to-policy-go/blob/CPLYTM-272/plugin/discovery.go). -Plugins are defined using manifest files placed in the `c2p-plugins` directory. -The plugin manifest is a JSON file that provides metadata about the plugin. -Check the quick start [guide](https://github.com/complytime/complyctl/blob/main/docs/QUICK_START.md) to see an example. - -**Note:** the plugin manifest file must have the following syntax for automatic discovery: `c2p--manifest.json` - -### Example Plugin Manifest - -``` -{ - “id”: “myplugin”, - “description”: “my example plugin”, - “version”: “0.1”, - “type”: [“pvp”], - “executablePath”: "myplugin" // in relation to the plugin directory - “sha256”: “23f…” // sha256 of executable - "configuration": [ - { - "name": "config_name", - "description": "Config description", - "default": "default_value", - "required": true - }, - ] -} -``` - -### Directory Naming Conventions - -In order to support automated aggregation of output files from multiple plugins the following directory names are expected by complyctl : - -**Note:** The `workspace` path will be provided by complyctl via the [configuration](https://github.com/complytime/complyctl/blob/6cf2e92aff852119bba83e579e2c6d8700e4bcec/internal/complytime/plugins.go#L72) and represents the user's desired working directory for all complyctl activities. - -- `{workspace}/{plugin name}/results` # files for evidence collection -- `{workspace}/{plugin name}/remediations` # files for automated remediation - -### Plugin Selection - -Complyctl generates a mapping of plugins to validation components at runtime. -This mapping uses the `title` of the validation component to find a matching plugin with that ID (defined in manifest). - -```json -{ - ... - “uuid”: “701c7...”, - “type”: “validation, - “title”: “myplugin”, // name must match plugin ID in manifest -} -``` - -## Example - -Below shows an example template for authoring a Golang plugin. - -```go - -import "github.com/oscal-compass/compliance-to-policy-go/v2/policy" - -type PluginServer struct {} - -func (s PluginServer) Generate(p policy.Policy) error { - - // PluginServer should implement the Generate() method to provide logic for - // translating OSCAL to the PVPs expected input format. Note: this may not be - // applicable to all PVPs. - -} - -func (s PluginServer) GetResults(p policy.Policy) (policy.PVPResult, error) { - - // PluginServer should implement the GetResults() method to provide logic to - // collect results from the PVP for a given policy. Note: if the PVP requires input - // from Generate() then the policy input here may be ignored. - -} -``` diff --git a/content/docs/projects/complyctl/quick-start.md b/content/docs/projects/complyctl/quick-start.md deleted file mode 100644 index e729f9a..0000000 --- a/content/docs/projects/complyctl/quick-start.md +++ /dev/null @@ -1,80 +0,0 @@ ---- -description: Get up and running with complyctl in minutes. -title: Quick Start -weight: 20 ---- - - -# Quick Start - -To get started with the `complyctl` CLI, at least one plugin must be installed with a corresponding OSCAL [Component Definition](https://pages.nist.gov/OSCAL/learn/concepts/layer/implementation/component-definition/). - -> Note: Some of these steps are manual. The [quick_start.sh](https://github.com/complytime/complyctl/blob/main/scripts/quick_start/quick_start.sh) automates the process below. - -## Step 1: Install Complyctl - -See [INSTALLATION.md](https://github.com/complytime/complyctl/blob/main/docs/INSTALLATION.md) - -## Step 2: Add configuration - -After running `complyctl list` for the first time, the complytime -directory should be created under $HOME/.local/share - -```markdown -complytime -├── bundles -└── plugins -└── controls -``` - -You will need an OSCAL Component Definition that defines an OSCAL Component for your target system and an OSCAL Component the corresponding -policy validation plugin. See `docs/samples/` for example configuration for the `myplugin` plugin. - -```bash -cp docs/samples/sample-component-definition.json ~/.local/share/complytime/bundles -cp docs/samples/sample-profile.json docs/samples/sample-catalog.json ~/.local/share/complytime/controls -``` - -## Step 3: Install a plugin - -Each plugin requires a plugin manifest. For more information about plugin discovery see [PLUGIN_GUIDE.md](https://github.com/complytime/complyctl/blob/main/docs/PLUGIN_GUIDE.md). - -```bash -plugin_dir="$HOME/.local/share/complytime/plugins" -cp "bin/openscap-plugin" "docs/samples/c2p-openscap-manifest.json" "$plugin_dir" -checksum=$(sha256sum ~/.local/share/complytime/plugins/openscap-plugin | awk '{ print $1 }' ) -version=$(bin/complyctl version | head -n1 | awk '{ print $2 }' | sed -E 's/^v([0-9]+\.[0-9]+\.[0-9]+).*/\1/') -sed -i -e "s|checksum_placeholder|$checksum|" -e "s|version_placeholder|$version|" "$plugin_dir/c2p-openscap-manifest.json" -``` - -## Step 4: Edit plugin configuration (optional) -```bash -mkdir -p /etc/complyctl/config.d -cp ~/.local/share/complytime/plugins/c2p-openscap-manifest.json /etc/complyctl/config.d -``` - -Edit `/etc/complyctl/config.d/c2p-openscap-manifest.json` to keep only the desired changes. e.g.: -```json -{ - "configuration": [ - { - "name": "policy", - "default": "custom_tailoring_policy.xml", - }, - { - "name": "arf", - "default": "custom_arf.xml", - }, - { - "name": "results", - "default": "custom_results.xml", - } - ] -} -``` - -### Using with the openscap-plugin - -If using the openscap-plugin, there are two prerequisites: -- **openscap-scanner** package installed -- **scap-security-guide** package installed diff --git a/content/docs/projects/complyscribe/_index.md b/content/docs/projects/complyscribe/_index.md deleted file mode 100644 index eae3b11..0000000 --- a/content/docs/projects/complyscribe/_index.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -description: A workflow automation tool for compliance content authoring. -title: complyscribe -weight: 10 ---- - - -# complyscribe - -ComplyScribe is a CLI tool that assists users in leveraging [Compliance-Trestle](https://github.com/oscal-compass/compliance-trestle) in CI/CD workflows for [OSCAL](https://github.com/usnistgov/OSCAL) formatted compliance content management. - -> WARNING: This project is currently under initial development. APIs may be changed incompatibly from one commit to another. - -## Getting Started - -### Available Commands - -The `autosync` command will sync trestle-generated Markdown files to OSCAL JSON files in a trestle workspace. All content under the provided markdown directory will be transformed when the action is run. This action supports all top-level models [supported by compliance-trestle for authoring](https://oscal-compass.github.io/compliance-trestle/tutorials/ssp_profile_catalog_authoring/ssp_profile_catalog_authoring/). - -The `rules-transform` command can be used when managing [OSCAL Component Definitions](https://pages.nist.gov/OSCAL-Reference/models/v1.1.1/component-definition/json-outline/) in a trestle workspace. The action will transform rules defined in the rules YAML view to an OSCAL Component Definition JSON file. - -The `create compdef` command can be used to create a new [OSCAL Component Definition](https://pages.nist.gov/OSCAL-Reference/models/v1.1.1/component-definition/json-outline/) in a trestle workspace. The action will create a new Component Definition JSON file and corresponding directories that contain rules YAML files and trestle-generated Markdown files. This action prepares the workspace for use with the `rules-transform` and `autosync` actions. - -The `sync-upstreams` command can be used to sync and validate upstream OSCAL content stored in a git repository to a local trestle workspace. The inputs `include_models` and `exclude_models` determine which content is synced to the trestle workspace. - -The `create ssp` command can be used to create a new [OSCAL System Security Plans](https://pages.nist.gov/OSCAL-Reference/models/v1.1.1/system-security-plan/json-outline/) (SSP) in a trestle workspace. The action will create a new SSP JSON file and corresponding directories that contain trestle-generated Markdown files. This action prepares the workspace for use with the `autosync` action by creating or updating the `ssp-index.json` file. The `ssp-index.json` file is used to track the relationships between the SSP and the other OSCAL content in the workspace for the `autosync` action. - -The `sync-cac-content` command supports transforming the [CaC content](https://github.com/ComplianceAsCode/content) to OSCAL models in a trestle workspace. For detailed documentation on how to use, see the [sync-cac-content.md](https://github.com/complytime/complyscribe/blob/main/docs/tutorials/sync-cac-content.md). - -The `sync-oscal-content` command supports sync OSCAL models to the [CaC content](https://github.com/ComplianceAsCode/content) in a trestle workspace. For detailed documentation on how to use, see the [sync-oscal-content.md](https://github.com/complytime/complyscribe/blob/main/docs/tutorials/sync-oscal-content.md). - - -Below is a table of the available commands and their current availability as a GitHub Action: - -| Command | Available as a GitHub Action | -|-------------------------------------------|------------------------------| -| `autosync` | ✓ | -| `rules-transform` | ✓ | -| `create compdef` | ✓ | -| `sync-upstreams` | ✓ | -| `create ssp` | | -| `sync-cac-content component-definition` | | -| `sync-cac-content profile` | | -| `sync-cac-content catalog` | | -| `sync-oscal-content component-definition` | | -| `sync-oscal-content profile` | | -| `sync-oscal-content catalog` | | - - -For detailed documentation on how to use each action, see the README.md in each folder under [actions](https://github.com/complytime/complyscribe/blob/main/actions). - - -### Supported Git Providers - -> Note: Only applicable if using `complyscribe` to create pull requests. Automatically detecting the git -provider information is supported for GitHub Actions (GitHub) and GitLab CI (GitLab). - -- GitHub -- GitLab - -### Run as a Container - -> Note: When running the commands in a container, all are prefixed with `complyscribe` (e.g. `complyscribe autosync`). The default entrypoint for the container is the autosync command. - -Build and run the container locally: - -```bash -podman build -f Dockerfile -t complyscribe . -podman run -v $(pwd):/data -w /data complyscribe -``` - -Container images are available in `quay.io`: - -```bash -podman run -v $(pwd):/data -w /data quay.io/continuouscompliance/complyscribe: -``` - -## Contributing - -For information about contributing to complyscribe, see the [CONTRIBUTING.md](https://github.com/complytime/complyscribe/blob/main/CONTRIBUTING.md) file. - -## License - -This project is licensed under the Apache 2.0 License - see the [LICENSE.md](https://github.com/complytime/complyscribe/blob/main/LICENSE) file for details. - -## Troubleshooting - -See [TROUBLESHOOTING.md](https://github.com/complytime/complyscribe/blob/main/TROUBLESHOOTING.md) for troubleshooting tips. diff --git a/content/docs/projects/complyscribe/troubleshooting.md b/content/docs/projects/complyscribe/troubleshooting.md deleted file mode 100644 index 17ea6b3..0000000 --- a/content/docs/projects/complyscribe/troubleshooting.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -description: Common issues and solutions for complyscribe. -title: Troubleshooting -weight: 30 ---- - - -Check [TROUBLESHOOTING.md](https://github.com/complytime/complyscribe/blob/main/TROUBLESHOOTING.md) \ No newline at end of file diff --git a/content/docs/projects/complyscribe/tutorials/authoring.md b/content/docs/projects/complyscribe/tutorials/authoring.md deleted file mode 100644 index 6919f1c..0000000 --- a/content/docs/projects/complyscribe/tutorials/authoring.md +++ /dev/null @@ -1,64 +0,0 @@ ---- -description: Tutorial on authoring compliance content with complyscribe. -title: Authoring Content -weight: 41 ---- - - -# Authoring Tutorial - -This tutorial provides an overview of the authoring process using `complyscribe`. We will use the component definition created in the [GitHub tutorial](https://redhatproductsecurity.github.io/complyscribe/tutorials/github/) as our starting point. This tutorial will demonstrate the workflow for updating Markdown content and syncing those changes to OSCAL. - -## 1. Prerequisites - -- Complete the [GitHub tutorial](https://complytime.github.io/complyscribe/tutorials/github/) - - -## 2. Edit in Markdown - -We will begin where we left off at the end of the [GitHub tutorial](https://redhatproductsecurity.github.io/complyscribe/tutorials/github/). Our repository has a newly created component definition named `my-first-compdef` with corresponding content in the `markdown/` and `component-definitions/` directories. We will now demonstrate how to author changes in Markdown and produce updated OSCAL content. - -1. Navigate to the `markdown/component-definitions/my-first-compdef/test-component/nist_rev5_800_53/ac` directory and select the `ac-1.md` file. -2. Click the `Edit this file` (pencil) icon. -3. Scroll down to the section titled `## What is the solution and how is it implemented?` and add a new line of text with a brief comment. For example: - -``` -## What is the solution and how is it implemented? - -Here is where details should be added by the author. -``` - -4. Click the `Commit changes..` button -5. Select the `Create a new branch for this commit and start a pull request` radio button -6. Click `Propose changes` - - -The `Open a pull request` page now opens. Enter any additional details about your changes into the description box. - -7. Click `Create pull request` -8. For demo purposes, we will go ahead and merge the pull request ourselves. In a production setting the pull request process should be used for review, discussion and approval of the proposed changes. Click `Merge pull request` and then `Confirm merge`. - - -## Autosync - -Once the pull request has been merged the `complyscribe rules-transform and autosync` GitHub action will be triggered. We will now validate that action was successful. - -1. Navigate to the `Actions` tab of your GitHub repository. -2. The top entry in the list of workflow runs should be titled `Merge pull request # from `. This action should be either running or have just successfully completed. -3. [Optional] Clicking this entry will allow you to view the detailed steps and log output. -4. Once the action is completed successfully, navigate back to the source code by clicking the `Code` tab of the repo. -5. Click the `component-definitions` folder and navigate to `my-first-compdef/component-definition.json`. -5. The `Last commit date` should align with the time the action completed. -6. Click the `component-definitions.json` file and then click the `History` icon to view the commit history. -7. Ensure the latest commit performed by the GitHub action reflects the changes made in Markdown as shown below: - -``` - "description": "", - "description": "Here is where details should be added by the author", -``` - -You will also notice the `"last-modified"` timestamp has been updated. - - -Congrats! You've successfully authored a change by modifying a Markdown file and letting complyscribe sync those changes back to the OSCAL content. - diff --git a/content/docs/projects/complyscribe/tutorials/github.md b/content/docs/projects/complyscribe/tutorials/github.md deleted file mode 100644 index 62a3c31..0000000 --- a/content/docs/projects/complyscribe/tutorials/github.md +++ /dev/null @@ -1,136 +0,0 @@ ---- -description: Using complyscribe with GitHub Actions. -title: GitHub Integration -weight: 42 ---- - - -# GitHub Tutorial - -This tutorial provides an introduction to using `complyscribe` with GitHub. We will be using a single GitHub repository for our trestle authoring workspace and executing the `complyscribe` commands as GitHub actions. Note, each repo is intended to support authoring a single OSCAL model type (SSP, component definition, etc.). If authoring more than one OSCAL model type, then a dedicated repository should be used for each model. - - -### 1. Prerequisites - -Before moving on, please ensure the following is completed: - -1. Create a new (or use an existing) empty GitHub repository -2. Clone the repo to a local workstation -3. Install complyscribe - * Option 1: Clone the [complyscribe](https://github.com/complytime/complyscribe/tree/main) repo to a local workstation and run `poetry install` - * Option 2: Use the [complyscribe container image](https://github.com/complytime/complyscribe?tab=readme-ov-file#run-as-a-container) - - -### 2. Set Permissions for GitHub Actions - -The `complyscribe` commands will be run inside of GitHub actions. These commands often perform `write` level operations against the repo contents. The GitHub workflows generated in this tutorial make use of [automatic token authentication.](https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication) To ensure this is configured correct the following repo settings need to be in place. - -*Note: If an alternative method is choosen to provide repo access, such as personal access tokens or GitHub apps, the following steps can be skipped.* - -1. Click the `Settings` tab for your GitHub repo -2. Select `Actions` -> `General` from the left-hand menu -3. Scroll down to `Workflow permissions` -4. Ensure `Read repository contents and packages permissions` is selected -5. Ensure `Allow GitHub Actions to create and approve pull requests` is checked - - -### 3. Initialize complyscribe Workspace - -The `complyscribe init` command will initialize the empty GitHub repository. Unlike other complyscribe commands, this command is run on the local workstation. The complyscribe commands can be installed by cloning the [complyscribe](https://github.com/complytime/complyscribe/tree/main) repo and running `poetry install`. Alternatively these commands can be run using the [complyscribe container image](https://github.com/complytime/complyscribe?tab=readme-ov-file#run-as-a-container). - -For this tutorial example, we will be authoring a component-definition. - -1a. Running complyscribe init using a locally installed complyscribe: - -``` -complyscribe init --repo-path -``` - -1b. Running complyscribe init using a complyscribe container image: - - * *Note: latest image version tag can be found in the [continuouscompliance repo on quay.io](https://quay.io/repository/continuouscompliance/complyscribe?tab=tags).* - -``` -podman run -v :/data:rw complyscribe: --oscal-model compdef --working-dir /data -``` - - * If the local workstation is in SELinux enforcing mode and a permissions error occurs, then the following command should be used instead: -``` -podman run -v :/data:Z complyscribe: --oscal-model compdef --working-dir /data -``` - - * Once the initiatization runs successfully, the following directories will be created within the local copy of the repository. - -```bash -. -├── assessment-plans -├── assessment-results -├── catalogs -├── component-definitions -├── markdown -├── plan-of-action-and-milestones -├── profiles -└── system-security-plans -``` - -2. Any catalog or profile content needed for the authoring process can now be added. - - * For this example, we will add the NIST SP 800-53 Rev. 5 catalog to our `/catalogs` directory. - -``` -mkdir catalogs/nist_rev5_800_53 -wget https://raw.githubusercontent.com/usnistgov/oscal-content/release-v1.0.5-update/nist.gov/SP800-53/rev5/json/NIST_SP-800-53_rev5_catalog.json -O catalogs/nist_rev5_800_53/catalog.json -``` - - * We will also add the NIST SP 800-53 Rev. 5 High Baseline profile to our `profiles/` directory. - -``` -mkdir profiles/nist_rev5_800_53 -wget https://raw.githubusercontent.com/usnistgov/oscal-content/release-v1.0.5-update/nist.gov/SP800-53/rev5/json/NIST_SP-800-53_rev5_HIGH-baseline_profile.json -O profiles/nist_rev5_800_53/profile.json -``` - -3. Our `profile.json` file contains a reference to our `catalog.json` file. By default, this path is not resolvable by compliance-trestle, so we need to run the following command to update the `href` value in the JSON. - -``` -sed -i 's/NIST_SP-800-53_rev5_catalog.json/trestle:\/\/catalogs\/nist_rev5_800_53\/catalog.json/g' profiles/nist_rev5_800_53/profile.json -``` - -4. Ready-made CI/CD workflows can be copied from the `.github/workflows/` directory within the upstream `trestle-demo` repository into the local trestle workspace. These are the complyscribe actions that will run as changes are made to the repo contents. - - * Copy the required template workflows from the `trestle-demo` repository into the new workspace repository. -``` -mkdir -p .github/workflows -wget -O .github/workflows/complyscribe-rules-transform.yml https://raw.githubusercontent.com/RedHatProductSecurity/trestle-demo/refs/heads/main/.github/workflows/complyscribe-rules-transform.yml -wget -O .github/workflows/complyscribe-create-component-definition.yml https://raw.githubusercontent.com/RedHatProductSecurity/trestle-demo/refs/heads/main/.github/workflows/complyscribe-create-component-definition.yml -``` - -5. ComplyScribe initial content is now created locally within the new trestle authoring workspace. This content can now be pushed to the remote GitHub repository. -``` -git add . -git commit -m "added example NIST SP 800-53 profile and component definition authoring workflow" -git push -``` - *Note: if this is the first git push to the remote GitHub repository, then use `git push -u origin main` rather than `git push`.* - - -### 4. Create a New Component Definition - -Now it's time to run our first complyscribe action within GitHub! We will go ahead and create our first component definition. - -1. Open the new remote GitHub repository in a web browser. -2. Click to the `Actions` tab from the top menu. -3. Click the `ComplyScribe create component definition` action from the left-hand menu. -4. Click `Run Workflow` which will open up a dialog box. -5. Enter the following values: - -* _Name of the Trestle profile to use for the component definition:_ `nist_rev5_800_53` -* _Name of the component definition to create:_ `my-first-compdef` -* _Name of the component to create in the generated component definition:_ `test-component` -* _Type of the component (e.g. service, policy, physical, validation, etc.):_ `service` -* _Description of the component to create:_ `Testing complyscribe init` - -6. Click `Run Workflow` - -Once the workflow job has completed, there will be a new Pull Request containing the files complyscribe generated for the component definition. After reviewing the committed changes, the Pull Request can then be merged into the main branch! - -**Congratulations! We have successfully created a new complyscribe workspace and have an authoring environment!** diff --git a/content/docs/projects/complyscribe/tutorials/sync-cac-content.md b/content/docs/projects/complyscribe/tutorials/sync-cac-content.md deleted file mode 100644 index fd34d6b..0000000 --- a/content/docs/projects/complyscribe/tutorials/sync-cac-content.md +++ /dev/null @@ -1,146 +0,0 @@ ---- -description: Synchronize Compliance-as-Code content with complyscribe. -title: Sync CaC Content -weight: 43 ---- - - -# The complyscribe command line sync-cac-content Tutorial - -This tutorial provides how to use `complyscribe sync-cac-content` transform [cac-content](https://github.com/ComplianceAsCode/content) to OSCAL models. -This command has three sub-commands `catalog`, `profile` and `component-definition` - -> **WARNING:** There is a sequential order when transformed, first Catalog, then Profile, last Component Definition. -> Because Profile depends on Catalog, and Component Definition depends on Profile. - -## catalog - -This command is to generate OSCAL Catalog according to CaC content policy - -### 1. Prerequisites - -- Initialize the [complyscribe workspace](../tutorials/github.md#3-initialize-complyscribe-workspace) if you do not have one. - -- Clone the [cac-content repository](https://github.com/ComplianceAsCode/content). - -### 2. Run the CLI sync-cac-content catalog - -A real world example, if we want to transform [cis_rhel8](https://github.com/ComplianceAsCode/content/blob/master/controls/cis_rhel8.yml) -to OSCAL Catalog, we run command like below,`cac-policy-id` is [control file id](https://github.com/ComplianceAsCode/content/blob/master/controls/cis_rhel8.yml#L4), -`oscal-catalog` is OSCAL Catalog directory name we will use when generating the OSCAL Catalog. - -```shell -poetry run complyscribe sync-cac-content catalog \ ---dry-run \ ---repo-path $complyscribe_workspace_root_dir \ ---committer-email tester@redhat.com \ ---committer-name tester \ ---branch main \ ---cac-policy-id cis_rhel8 \ ---oscal-catalog cis_rhel8 \ ---cac-content-root $cac_content_root_dir -``` - -After successfully running above command, will generate [catalogs/cis_rhel8/catalog.json](https://github.com/ComplianceAsCode/oscal-content/blob/main/catalogs/cis_rhel8/catalog.json) - -For more details about these options and additional flags, you can use the `--help` flag: -`poetry run complyscribe sync-cac-content catalog --help` -This will display a full list of available options and their descriptions. - -After running the CLI with the right options, you would successfully generate an OSCAL Catalog under -`$complyscribe_workspace_root_dir/catalogs`. - - -## profile - -This command is to generate OSCAL Profile according to content policy - -### 1. Prerequisites - -- Initialize the [complyscribe workspace](../tutorials/github.md#3-initialize-complyscribe-workspace) if you do not have one. - -- Clone the [cac-content repository](https://github.com/ComplianceAsCode/content). - -### 2. Run the CLI sync-cac-content profile - -A real world example, if we want to transform [rhel8 product](https://github.com/ComplianceAsCode/content/tree/master/products/rhel8) -that using [cis_rhel8 control file](https://github.com/ComplianceAsCode/content/blob/master/controls/cis_rhel8.yml) to OSCAL Profile, -we run command like below, `product` is [product name](https://github.com/ComplianceAsCode/content/blob/master/products/rhel8/product.yml#L1), -`oscal-catalog` is OSCAL [catalog directory name](https://github.com/ComplianceAsCode/oscal-content/tree/main/catalogs/cis_rhel8), -`cac-policy-id` is [control file id](https://github.com/ComplianceAsCode/content/blob/master/controls/cis_rhel8.yml#L4) - -```shell -poetry run complyscribe sync-cac-content profile \ ---dry-run \ ---repo-path $complyscribe_workspace_root_dir \ ---committer-email tester@redhat.com \ ---committer-name tester \ ---branch main \ ---cac-content-root $cac_content_root_dir \ ---product rhel8 \ ---oscal-catalog cis_rhel8 \ ---cac-policy-id cis_rhel8 -``` - -After successfully running above command, you will generate four OSCAL -Profiles([rhel8-cis_rhel8-l1_server](https://github.com/ComplianceAsCode/oscal-content/blob/main/profiles/rhel8-cis_rhel8-l1_server/profile.json) -,[rhel8-cis_rhel8-l2_server](https://github.com/ComplianceAsCode/oscal-content/blob/main/profiles/rhel8-cis_rhel8-l2_server/profile.json), -[rhel8-cis_rhel8-l1_workstation](https://github.com/ComplianceAsCode/oscal-content/blob/main/profiles/rhel8-cis_rhel8-l1_workstation/profile.json), -[rhel8-cis_rhel8-l2_workstation](https://github.com/ComplianceAsCode/oscal-content/blob/main/profiles/rhel8-cis_rhel8-l2_workstation/profile.json)), -every [level](https://github.com/ComplianceAsCode/content/blob/master/controls/cis_rhel8.yml#L8) has its own Profile. - -For more details about these options and additional flags, you can use the `--help` flag: -`poetry run complyscribe sync-cac-content profile --help` -This will display a full list of available options and their descriptions. - -After running the CLI with the right options, you would successfully generate an OSCAL Profile -under `$complyscribe_workspace_root_dir/profiles/$product_$cac-policy-id_$level`. - -## component-definition - -This command creates OSCAL Component Definitions by transforming CaC content control files. - -The CLI performs the following transformations: - -- Populate CaC product information to OSCAL component title and description -- Ensure OSCAL component control mappings are populated with rule and rule parameter data from CaC control files -- Create a validation component from SSG rules to check mappings -- Ensure OSCAL Component Definition implemented requirements are populated from control notes in the control file -- Ensure implementation status of an implemented requirement in OSCAL Component Definitions are populated with the status from CaC control files - -### 1. Prerequisites - -- Initialize the [complyscribe workspace](../tutorials/github.md#3-initialize-complyscribe-workspace). - -- Clone the [cac-content repository](https://github.com/ComplianceAsCode/content). - -### 2. Run the CLI sync-cac-content component-definition - -A real world example. If we want to transform [cis_server_l1.profile](https://github.com/ComplianceAsCode/content/blob/master/products/rhel8/profiles/cis_server_l1.profile) -to an OSCAL Component Definition, we run command like below. `product` is [product name](https://github.com/ComplianceAsCode/content/blob/master/products/rhel8/product.yml#L1), -`cac-profile` is [CaC content profile file name](https://github.com/ComplianceAsCode/content/blob/master/products/rhel8/profiles/cis_server_l1.profile) you need transform, -`oscal-profile` is [OSCAL profile directory name](https://github.com/ComplianceAsCode/oscal-content/blob/main/profiles/rhel8-cis_rhel8-l1_server/profile.json) corresponding -to CaC content profile, `component-definition-type` is [a category describing the purpose of the component](https://pages.nist.gov/OSCAL-Reference/models/v1.1.3/component-definition/json-reference/#/component-definition/components/type). - -```shell -poetry run complyscribe sync-cac-content component-definition \ ---dry-run \ ---repo-path $complyscribe_workspace_root_dir \ ---committer-email tester@redhat.com \ ---committer-name tester \ ---branch main \ ---cac-content-root $cac_content_root_dir \ ---product rhel8 \ ---component-definition-type software \ ---oscal-profile rhel8-cis_rhel8-l1_server \ ---cac-profile cis_server_l1 -``` - -After successfully running above command, will generate an OSCAL [Component Definition](https://github.com/ComplianceAsCode/oscal-content/blob/main/component-definitions/rhel8/rhel8-cis_rhel8-l1_server/component-definition.json) - -For more details about these options and additional flags, you can use the `--help` flag: -`poetry run complyscribe sync-cac-content component-definition --help` -This will display a full list of available options and their descriptions. - -After running the CLI with the right options, you would successfully generate an OSCAL Component Definition -under $complyscribe_workspace_root_dir/component-definitions/$product_name/$OSCAL-profile-name. diff --git a/content/docs/projects/complyscribe/tutorials/sync-oscal-content.md b/content/docs/projects/complyscribe/tutorials/sync-oscal-content.md deleted file mode 100644 index de4f78e..0000000 --- a/content/docs/projects/complyscribe/tutorials/sync-oscal-content.md +++ /dev/null @@ -1,119 +0,0 @@ ---- -description: Synchronize OSCAL content with complyscribe. -title: Sync OSCAL Content -weight: 44 ---- - - -# The complyscribe command line sync-oscal-content Tutorial - -This tutorial provides how to use `complyscribe sync-oscal-content` sync OSCAL models to [cac-content](https://github.com/ComplianceAsCode/content). - -Currently, this command has three sub-command: `component-definition` and `profile` and `catalog` - -## component-definition - -This command is to sync OSCAL Component Definition information to CaC content side. - -The CLI performs the following sync: - -- Sync OSCAL Component Definition parameters/rules changes to CaC content profile file -- Sync OSCAL Component Definition parameters/rules changes to CaC content control file -- Add a hint comment to the control file when a missing rule is found in the CaC content repo. -- Sync OSCAL Component Definition control status changes to CaC content control file. Since status mapping between -cac and OSCAL is many-to-many relationship, if status can not be determined when sync, then add a comment to let user -decide. Discussion detail in [doc](https://github.com/complytime/complyscribe/discussions/511) -- Add new option to cac var file when found variable exists but missing the option we sync. -- Sync OSCAL Component Definition statements field to CaC control notes field - -### 1. Prerequisites - -- Initialize the [complyscribe workspace](../tutorials/github.md#3-initialize-complyscribe-workspace). - -- Clone the [cac-content repository](https://github.com/ComplianceAsCode/content). - -- Has an OSCAL Component Definition file, (transformed from CaC content using `sync-cac-content component-definition` cmd) - -### 2. Run the CLI sync-oscal-content component-definition -```shell -poetry run complyscribe sync-oscal-content component-definition \ ---branch main \ ---cac-content-root $cac_content_root_dir \ ---committer-name tester \ ---committer-email tester@redhat.com \ ---dry-run \ ---repo-path $complyscribe_workspace_root_dir \ ---product $product-name \ ---oscal-profile $oscal-profile-name -``` - -For more details about these options and additional flags, you can use the --help flag: -`poetry run complyscribe sync-oscal-content component-definition --help` -This will display a full list of available options and their descriptions. - - -## profile - -This command is to sync OSCAL Profile information to CaC content side. - -The CLI performs the following sync: - -- Sync OSCAL Profile control levels change to CaC control files - -### 1. Prerequisites - -- Initialize the [complyscribe workspace](../tutorials/github.md#3-initialize-complyscribe-workspace). - -- Clone the [cac-content repository](https://github.com/ComplianceAsCode/content). - -- Have OSCAL Profile file, (transformed from CaC content using `sync-cac-content profile` cmd) - -### 2. Run the CLI sync-oscal-content profile - -```shell -poetry run complyscribe sync-oscal-content profile \ ---dry-run \ ---repo-path $complyscribe_workspace_root_dir \ ---committer-email tester@redhat.com \ ---committer-name tester \ ---branch main \ ---cac-content-root $cac_content_root_dir \ ---cac-policy-id cis_rhel8 \ ---product rhel8 -``` - -For more details about these options and additional flags, you can use the --help flag: -`poetry run complyscribe sync-oscal-content profile --help` -This will display a full list of available options and their descriptions. - -## catalog - -This command is to sync OSCAL Catalog information to CaC content side. - -The CLI performs the following sync: - -- Sync OSCAL Catalog control parts field change to CaC control files control description field - -### 1. Prerequisites - -- Initialize the [complyscribe workspace](../tutorials/github.md#3-initialize-complyscribe-workspace). - -- Clone the [cac-content repository](https://github.com/ComplianceAsCode/content). - -- An OSCAL Catalog file, (transformed from CaC content using `sync-cac-content catalog` cmd) - -### 2. Run the CLI sync-oscal-content catalog -```shell -poetry run complyscribe sync-oscal-content catalog \ ---cac-policy-id nist_ocp4 \ ---cac-content-root $cac_content_root_dir \ ---repo-path $complyscribe_workspace_root_dir \ ---committer-name tester \ ---committer-email tester@redhat.com \ ---branch main \ ---dry-run -``` - -For more details about these options and additional flags, you can use the --help flag: -`poetry run complyscribe sync-oscal-content catalog --help` -This will display a full list of available options and their descriptions. \ No newline at end of file diff --git a/content/docs/projects/complytime-collector-components/_index.md b/content/docs/projects/complytime-collector-components/_index.md deleted file mode 100644 index 98c87ba..0000000 --- a/content/docs/projects/complytime-collector-components/_index.md +++ /dev/null @@ -1,151 +0,0 @@ ---- -description: OpenTelemetry-based observability toolkit for compliance evidence collection. -title: complytime-collector-components -weight: 10 ---- - - -# ComplyBeacon - -**ComplyBeacon** is an open-source observability toolkit designed to collect, normalize, and enrich compliance evidence, extending the OpenTelemetry (OTEL) standard. - -By bridging the gap between raw policy scanner output and modern logging pipelines, it provides a unified, enriched, and auditable data stream for security and compliance analysis. - ---- - -⚠️ **WARNING:** All components are under initial development and are **not** ready for production use. - ---- - -## The ComplyBeacon Architecture - -ComplyBeacon is a policy-driven observability toolkit composed of four main components that work together to process and enrich compliance data. - -### 1. ProofWatch - -An instrumentation library that accepts and emits pre-normalized compliance evidence as an OpenTelemetry log stream, while also instrumenting metrics for real-time observability. - -### 2. Beacon - -A custom OpenTelemetry Collector distribution that acts as the pipeline's host, receiving log records from ProofWatch and preparing them for the next stage of enrichment. - -### 3. TruthBeam - -A custom OpenTelemetry Collector processor that enriches log records with compliance and risk data by integrating with the Compass service. - -### 4. Compass - -A central enrichment service that provides risk, threat, and compliance framework attributes based on policy lookup data. - -#### Supported Compass Mappers - -| Mapper | Description | -|---------|----------------------------------------------------| -| `basic` | Maps to the `gemara` model based on log attributes | - -## Quick Start - -Before Deploying: Please read the following **NOTE**. - -⚠️ **NOTE:** -To enable evidence log synchronization to AWS S3 and Hyperproof, you must configure the following environment variables. The collector will fail to start if the S3 configuration is invalid. - -For more detailed information, please refer to the integration guide: [Sync_Evidence2Hyperproof](https://github.com/complytime/complytime-collector-components/blob/main/docs/integration/Sync_Evidence2Hyperproof.md). - -| Environment Variable | Description | -|------------------------|---------------------------------------------------------| -| `AWS_REGION` | The AWS region where your S3 bucket is hosted | -| `S3_BUCKETNAME` | The name of the target S3 bucket. | -| `S3_OBJ_DIR` | The folder path (prefix) for bucket subjects | -| `AWS_ACCESS_KEY_ID` | The AWS Access Key ID with permissions to the bucket | -| `AWS_SECRET_ACCESS_KEY`| The AWS Secret Access Key corresponding to the ID. | - - -If you do not wish to use the AWS S3 integration, you can disable it by modifying the configuration files: - -A. **In [hack/demo/demo-config.yaml](https://github.com/complytime/complytime-collector-components/blob/main/hack/demo/demo-config.yaml)** change the exporters line from: - -`exporters: [debug, otlphttp/logs, awss3/logs, signaltometrics]` - -to - -`exporters: [debug, otlphttp/logs, signaltometrics]` - -The `awss3/logs` configuration in `exporters` section should also be commented. - -```yaml -exporters: - debug: - verbosity: detailed - otlphttp/logs: - endpoint: "http://loki:3100/otlp" - tls: - insecure: true - # File exporter: writes metrics as JSON for filelog receiver - file/metrics: - path: /data/metrics.jsonl - format: json - awss3/logs: - s3uploader: - region: ${AWS_REGION} - s3_bucket: ${S3_BUCKETNAME} - s3_prefix: ${S3_OBJ_DIR} - s3_partition_format: "" -``` - -to - -```yaml -exporters: - debug: - verbosity: detailed - otlphttp/logs: - endpoint: "http://loki:3100/otlp" - tls: - insecure: true - # File exporter: writes metrics as JSON for filelog receiver - file/metrics: - path: /data/metrics.jsonl - format: json -# awss3/logs: -# s3uploader: -# region: ${AWS_REGION} -# s3_bucket: ${S3_BUCKETNAME} -# s3_prefix: ${S3_OBJ_DIR} -# s3_partition_format: "" -``` - -B. **Comment collector.environment part of [compose.yml](https://github.com/complytime/complytime-collector-components/blob/main/compose.yaml)** as the AWS S3 environment variables will no longer be needed. - -Once you've reviewed the **NOTE** above, follow these steps to deploy the infrastructure and test the pipeline. - -1. **Deploy the Stack:** - This command builds and starts the full infrastructure, including Grafana, Loki, the custom collector (`Beacon`), and the `Compass` service. - ```bash - podman-compose up --build - ``` - -2. **Test the Pipeline:** - Send sample compliance data to the webhook receiver to test the pipeline's functionality. - ```bash - curl -X POST http://localhost:8088/eventsource/receiver -H "Content-Type: application/json" -d @hack/sampledata/evidence.json - ``` - -3. **Enable grafana dashboard:** - If you want to configure loki as default datasource on grafana and enable pre-build grafana dashboard, refer to [README.md](https://github.com/complytime/complytime-collector-components/blob/main/hack/demo/terraform/README.md) - -## Project Design - -For additional details on the planned design and roadmap, see [`DESIGN.md`](https://github.com/complytime/complytime-collector-components/blob/main/docs/DESIGN.md). - -## Updating the Semantic Conventions - -Update semantic convention under `model/` - -Validate with `make weaver-check` - -Update docs and code: -`make weaver-docsgen` -`make weaver-codegen` - ---- diff --git a/content/docs/projects/complytime-collector-components/attributes/compliance.md b/content/docs/projects/complytime-collector-components/attributes/compliance.md deleted file mode 100644 index 502e5d2..0000000 --- a/content/docs/projects/complytime-collector-components/attributes/compliance.md +++ /dev/null @@ -1,68 +0,0 @@ ---- -description: Reference for compliance-related OpenTelemetry attributes. -title: Compliance Attributes -weight: 41 ---- - - - - - -# Compliance - -## Compliance Assessment Attributes - -Attributes added by compliance assessment tools to map policy results to compliance frameworks. Provides compliance context, risk assessment, and regulatory mapping for audit and reporting. Maps to GEMARA Layer 5 (Enforcement) for Policy-as-Code workflows. - -| Attribute | Type | Description | Examples | Stability | -|---|---|---|---|---| -| `compliance.assessment.id` | string | Unique identifier for the compliance assessment run or session. Used to group findings from the same assessment execution. | `assessment-2024-001`; `scan-run-abc123`; `compliance-check-xyz789` | ![Development](https://img.shields.io/badge/-development-blue) | -| `compliance.control.applicability` | string[] | Environments or contexts where this control applies. | `["Production", "Staging"]`; `["All Environments"]`; `["Kubernetes", "AWS"]` | ![Development](https://img.shields.io/badge/-development-blue) | -| `compliance.control.catalog.id` | string | Unique identifier for the security control catalog or framework. | `OSPS-B`; `CCC`; `CIS` | ![Development](https://img.shields.io/badge/-development-blue) | -| `compliance.control.category` | string | Category or family that the security control belongs to. | `Access Control`; `Quality` | ![Development](https://img.shields.io/badge/-development-blue) | -| `compliance.control.id` | string | Unique identifier for the security control and assessment requirement being assessed. | `OSPS-QA-07.01` | ![Development](https://img.shields.io/badge/-development-blue) | -| `compliance.enrichment.status` | string | Result of the compliance framework mapping and enrichment process, indicating whether compliance context was successfully added to the event. | `Success`; `Unmapped`; `Partial` | ![Development](https://img.shields.io/badge/-development-blue) | -| `compliance.frameworks` | string[] | Regulatory or industry standards being evaluated for compliance. | `["NIST-800-53", "ISO-27001"]` | ![Development](https://img.shields.io/badge/-development-blue) | -| `compliance.remediation.action` | string | Remediation action determined by the policy engine in response to the compliance assessment result. | `Block`; `Allow`; `Remediate` | ![Development](https://img.shields.io/badge/-development-blue) | -| `compliance.remediation.description` | string | Description of the recommended remediation strategy for this control. | `This is a short description of the remediation strategy for this control.` | ![Development](https://img.shields.io/badge/-development-blue) | -| `compliance.remediation.exception.active` | boolean | Whether the exception is active for this enforcement. | `true`; `false` | ![Development](https://img.shields.io/badge/-development-blue) | -| `compliance.remediation.exception.id` | string | Unique identifier for the approved exception, if applicable. | `EX-2025-10-001`; `WAIVE-AC-1-001` | ![Development](https://img.shields.io/badge/-development-blue) | -| `compliance.remediation.status` | string | Outcome of the remediation action execution, indicating whether the remediation was successfully applied. | `Success`; `Fail`; `Skipped` | ![Development](https://img.shields.io/badge/-development-blue) | -| `compliance.requirements` | string[] | Compliance requirement identifiers from the frameworks impacted. | `["AC-1", "A.9.1.1"]` | ![Development](https://img.shields.io/badge/-development-blue) | -| `compliance.risk.level` | string | Severity classification of the risk posed by non-compliance with the control requirement. | `Critical`; `High`; `Medium` | ![Development](https://img.shields.io/badge/-development-blue) | -| `compliance.status` | string | Overall compliance determination for the assessed resource or control, indicating whether it meets the compliance requirements. | `Compliant`; `Non-Compliant`; `Exempt` | ![Development](https://img.shields.io/badge/-development-blue) | - ---- - -`compliance.enrichment.status` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. - -| Value | Description | Stability | -|---|---|---| - ---- - -`compliance.remediation.action` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. - -| Value | Description | Stability | -|---|---|---| - ---- - -`compliance.remediation.status` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. - -| Value | Description | Stability | -|---|---|---| - ---- - -`compliance.risk.level` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. - -| Value | Description | Stability | -|---|---|---| - ---- - -`compliance.status` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. - -| Value | Description | Stability | -|---|---|---| diff --git a/content/docs/projects/complytime-collector-components/attributes/policy.md b/content/docs/projects/complytime-collector-components/attributes/policy.md deleted file mode 100644 index 059da03..0000000 --- a/content/docs/projects/complytime-collector-components/attributes/policy.md +++ /dev/null @@ -1,36 +0,0 @@ ---- -description: Reference for policy-related OpenTelemetry attributes. -title: Policy Attributes -weight: 42 ---- - - - - - -# Policy - -## Policy Engine Attributes - -Attributes emitted by policy engines (OPA, Gatekeeper, etc.) during policy evaluation and enforcement. Maps to GEMARA Layer 4 (Evaluation) for Policy-as-Code workflows. - -| Attribute | Type | Description | Examples | Stability | -|---|---|---|---|---| -| `policy.engine.name` | string | Name of the policy engine that performed the evaluation or enforcement action. | `OPA`; `Gatekeeper`; `Conftest`; `Sentinel` | ![Development](https://img.shields.io/badge/-development-blue) | -| `policy.engine.version` | string | Version of the policy engine. | `v3.14.0`; `v0.45.0`; `v1.2.3`; `v2.0.1` | ![Development](https://img.shields.io/badge/-development-blue) | -| `policy.evaluation.message` | string | Additional context about the policy evaluation result. | `The policy evaluation failed due to a missing attribute.` | ![Development](https://img.shields.io/badge/-development-blue) | -| `policy.evaluation.result` | string | Outcome of the policy rule evaluation, indicating the result of the policy check. | `Not Run`; `Passed`; `Failed` | ![Development](https://img.shields.io/badge/-development-blue) | -| `policy.rule.id` | string | Unique identifier for the policy rule being evaluated or enforced. | `deny-root-user`; `require-encryption`; `check-labels` | ![Development](https://img.shields.io/badge/-development-blue) | -| `policy.rule.name` | string | Human-readable name of the policy rule. | `Deny Root User`; `Require Encryption`; `Check Resource Labels` | ![Development](https://img.shields.io/badge/-development-blue) | -| `policy.rule.uri` | string | Source control URL and version of the policy-as-code file for auditability. | `github.com/org/policy-repo/b8a7c2e`; `gitlab.com/company/policies@v1.2.3` | ![Development](https://img.shields.io/badge/-development-blue) | -| `policy.target.environment` | string | Environment where the target resource or entity exists. | `production`; `staging`; `development` | ![Development](https://img.shields.io/badge/-development-blue) | -| `policy.target.id` | string | Unique identifier for the resource or entity being evaluated or enforced against. | `deployment-123`; `resource-456`; `user-789` | ![Development](https://img.shields.io/badge/-development-blue) | -| `policy.target.name` | string | Human-readable name of the resource or entity being evaluated or enforced against. | `frontend-deployment`; `s3-bucket-secrets`; `admin-user` | ![Development](https://img.shields.io/badge/-development-blue) | -| `policy.target.type` | string | Type of the resource or entity being evaluated or enforced against. | `deployment`; `resource`; `user`; `configuration` | ![Development](https://img.shields.io/badge/-development-blue) | - ---- - -`policy.evaluation.result` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used. - -| Value | Description | Stability | -|---|---|---| diff --git a/content/docs/projects/complytime-collector-components/design.md b/content/docs/projects/complytime-collector-components/design.md deleted file mode 100644 index 268b9b3..0000000 --- a/content/docs/projects/complytime-collector-components/design.md +++ /dev/null @@ -1,152 +0,0 @@ ---- -description: Architecture and design decisions for Collector Components. -title: Design -weight: 20 ---- - - -# ComplyBeacon Design Documentation - -## Key Features - -- **OpenTelemetry Native**: Built on the OpenTelemetry standard for seamless integration with existing observability pipelines. -- **Automated Enrichment**: Enriches raw evidence with risk scores, threat mappings, and regulatory requirements via the Compass service. -- **Composability**: Components are designed as a toolkit; they are not required to be used together, and users can compose their own pipelines. -- **Compliance-as-Code**: Leverages the `gemara` model for a robust, auditable, and automated approach to risk assessment. - -## Architecture Overview - -### Design Principles - -* **Modularity:** The system is composed of small, focused, and interchangeable services. - -* **Standardization:** The architecture is built on OpenTelemetry to ensure broad compatibility and interoperability. - -* **Operational Experience:** The toolkit is built for easy deployment, configuration, and maintenance using familiar cloud-native practices and protocols. - - -### Data Flow - -The ComplyBeacon architecture is centered around a unified enrichment pipeline that processes and enriches compliance evidence. The primary data flow begins with a source that generates OpenTelemetry-compliant logs. - -1. **Log Ingestion**: A source generates compliance evidence and sends it as a structured log record to the `Beacon` collector, typically using `ProofWatch` to handle the emission. This can also be done by an OpenTelemetry collector agent. -2. **Enrichment Request**: The log record is received by the `Beacon` collector and forwarded to the `truthbeam` processor. `truthbeam` extracts key attributes from the record and sends an enrichment request to the `Compass` API. -3. **Enrichment Lookup**: The `Compass` service performs a lookup based on the provided attributes and returns a response containing compliance-related context (e.g., impacted baselines, requirements, and risk). -4. **Attribute Injection**: `truthbeam` adds these new attributes from `Compass` to the original log record. -5. **Export**: The now-enriched log record is exported from the `Beacon` collector to a final destination (e.g., a SIEM, logging backend, or data lake) for analysis and correlation. - -``` -┌───────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ │ -│ │ -│ ┌─────────────────────────┐ │ -│ │ │ │ -│ │ Beacon Collector Distro │ │ -│ ┌────────────────────┐ ┌───────────────────┐ │ │ │ -│ │ │ │ │ ├─────────────────────────┤ │ -│ │ ├───┤ ProofWatch ├───┼────┐ │ │ -│ │ │ │ │ │ │ │ │ -│ │ Policy Log │ └───────────────────┘ │ ┌┴─────────────────┐ │ │ -│ │ Source App │ │ │ │ │ │ -│ │ │ │ │ OTLP │ │ │ -│ │ │ │ │ Reciever │ │ │ -│ │ │ ┌────────────────────────┼───┤ │ │ │ -│ └────────────────────┘ │ │ └────────┬─────────┘ │ ┌─────────────┐ │ -│ │ │ │ │ │ │ │ -│ │ │ ┌────────┴─────────┐ │ │ │ │ -│ │ │ │ │ │ │ Compass API │ │ -│ │ │ │ TruthBeam │──┼──────────────►│ │ │ -│ ┌───────────────────────┴───┐ │ │ Processor │ │ │ │ │ -│ │ │ │ │ │ │ └─────────────┘ │ -│ │ │ │ └────────┬─────────┘ │ │ -│ │ OpenTelemetry │ │ │ │ │ -│ │ Collector Agent │ │ ┌────────┴─────────┐ │ │ -│ │ │ │ │ Exporter │ │ │ -│ │ │ │ │ (e.g. Loki, │ │ │ -│ │ │ │ │ Splunk, AWSS3) │ │ │ -│ │ │ │ └──────────────────┘ │ │ -│ │ │ └─────────────────────────┘ │ -│ └───────────────────────────┘ │ -│ │ -└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ -``` - -### Deployment Patterns - -ComplyBeacon is designed to be a flexible toolkit. Its components can be used in different combinations to fit a variety of operational needs. - -* **Full Pipeline**: The most common use case where `ProofWatch` emits events to the `Beacon` collector, which in turn uses `TruthBeam` and `Compass` to enrich and export logs to a final destination. -* **Integrating `TruthBeam`**: `TruthBeam` can be included in an existing OpenTelemetry Collector distribution, allowing you to add enrichment capabilities to your current observability pipeline. -* **Standalone `Compass`**: The `Compass` service can be deployed as an independent API, enabling it to be called by any application or a different enrichment processor within an existing OpenTelemetry or custom logging pipeline. - -## Component Analysis - -### 1. ProofWatch - -**Purpose**: An instrumentation library for collecting and emitting compliance evidence as OpenTelemetry log streams. It provides a standardized interface for tracking policy evaluation events and compliance evidence in real-time. - -**Key Responsibilities**: -* Converts compliance evidence data into standardized OpenTelemetry log records. -* Emits log records to the OpenTelemetry Collector using the OTLP (OpenTelemetry Protocol). -* Provides metrics and tracing for evidence collection and processing. - -`proofwatch` attributes defined [here](https://github.com/complytime/complytime-collector-components/blob/main/docs/attributes) - -_Example code snippet_ -```go -import ( - "context" - "log" - - "go.opentelemetry.io/otel/log" - "github.com/complytime/complybeacon/proofwatch" -) - -// Create a new ProofWatch instance -pw, err := proofwatch.NewProofWatch() -if err != nil { - log.Fatal(err) -} - -// Create evidence (example with GemaraEvidence) -evidence := proofwatch.GemaraEvidence{ - // ... populate evidence fields -} - -// Log evidence with default severity -err = pw.Log(ctx, evidence) -if err != nil { - return fmt.Errorf("error logging evidence: %w", err) -} - -// Or log with specific severity -err = pw.LogWithSeverity(ctx, evidence, olog.SeverityWarn) -``` - -### 2. Beacon Collector Distro - -**Purpose**: A minimal OpenTelemetry Collector distribution that acts as the runtime environment for the `complybeacon` evidence pipeline, specifically by hosting the `truthbeam` processor. - -**Key Responsibilities**: -* Receiving log records from sources like `proofwatch` -* Running the `truthbeam` log processor on each log record. -* Exporting the processed, enriched logs to a configured backend. - -### 3. TruthBeam - -**Purpose**: To enrich log records with compliance-related context by querying the `compass` service. This is the core logic that transforms a simple policy check into an actionable compliance event. - -**Key Responsibilities**: -* Maintains a local, in-memory cache of previously enriched data to reduce API calls and improve performance. -* Queries the Compass API for enrichment data based on attributes in the log record. -* Skips enrichment on API failures, tagging the log record with an enrichment_status: skipped attribute to enable graceful degradation. -* Adds the returned enrichment data as new attributes to the log record. - -### 4. Compass - -**Purpose**: A centralized lookup service that provides compliance context. It's the source of truth for mapping policies to standards and risk attributes. - -**Key Responsibilities**: -* Receiving an EnrichmentRequest from `truthbeam`. -* Performing a lookup based on the policy details. -* Returning an EnrichmentResponse with compliance attributes. diff --git a/content/docs/projects/complytime-collector-components/development.md b/content/docs/projects/complytime-collector-components/development.md deleted file mode 100644 index 14ca904..0000000 --- a/content/docs/projects/complytime-collector-components/development.md +++ /dev/null @@ -1,399 +0,0 @@ ---- -description: Developer guide for building and extending Collector Components. -title: Development -weight: 30 ---- - - -# ComplyBeacon Development Guide - -This guide provides comprehensive instructions for setting up, building, and testing the ComplyBeacon project. -It complements the [DESIGN.md](https://github.com/complytime/complytime-collector-components/blob/main/docs/DESIGN.md) document by focusing on the practical aspects of development. - - -* [ComplyBeacon Development Guide](#complybeacon-development-guide) - * [Prerequisites](#prerequisites) - * [Required Software](#required-software) - * [Development Environment Setup](#development-environment-setup) - * [1. Clone the Repository](#1-clone-the-repository) - * [2. Install podman-compose (if needed)](#2-install-podman-compose-if-needed) - * [3. Initialize Go Workspace](#3-initialize-go-workspace) - * [4. Install Dependencies](#4-install-dependencies) - * [5. Verify Installation](#5-verify-installation) - * [Project Structure](#project-structure) - * [Testing](#testing) - * [Running Tests](#running-tests) - * [Integration Testing](#integration-testing) - * [Component Development](#component-development) - * [1. ProofWatch Development](#1-proofwatch-development) - * [2. Compass Development](#2-compass-development) - * [3. TruthBeam Development](#3-truthbeam-development) - * [4. Beacon Distro Development](#4-beacon-distro-development) - * [Debugging and Troubleshooting](#debugging-and-troubleshooting) - * [Debugging Tools](#debugging-tools) - * [Code Generation](#code-generation) - * [1. API Code Generation](#1-api-code-generation) - * [2. OpenTelemetry Semantic Conventions](#2-opentelemetry-semantic-conventions) - * [3. Manual Code Generation](#3-manual-code-generation) - * [Deployment and Demo](#deployment-and-demo) - * [Local Development Demo](#local-development-demo) - * [Additional Resources](#additional-resources) - - -## Prerequisites - -### Required Software - -- **Go 1.24+**: The project uses Go 1.24.0 with toolchain 1.24.5 -- **Podman**: For containerized development and deployment -- **podman-compose**: For orchestrating multi-container development environments -- **Make**: For build automation -- **Git**: For version control -- **openssl** Cryptography toolkit - -## Development Environment Setup - -### 1. Clone the Repository - -```bash -git clone https://github.com/complytime/complybeacon.git -cd complybeacon -``` - -### 2. Install podman-compose (if needed) - -The project uses `podman-compose` for container orchestration. Install it if you don't have it: - -```bash -# Install podman-compose -pip install podman-compose - -# alternatively for Fedora: -dnf install podman-compose - -# Verify installation -podman-compose --version -``` - -### 3. Initialize Go Workspace - -The project uses Go workspaces to manage multiple modules: - -```bash -make workspace -``` - -This creates a `go.work` file that includes all project modules: -- `./compass` -- `./proofwatch` -- `./truthbeam` - -### 4. Install Dependencies - -Dependencies are managed per module. Install them for all modules: - -```bash -# Install dependencies for all modules -for module in compass proofwatch truthbeam; do - cd $module && go mod download && cd .. -done -``` - -### 5. Verify Installation - -```bash -# Run tests to verify everything works -make test - -# Build all binaries -make build -``` - -## Project Structure - -``` -complybeacon/ -├── api.yaml # OpenAPI specification for Compass service -├── compose.yaml # podman-compose configuration for demo environment -├── Makefile # Build automation -├── docs/ # Documentation -│ ├── DESIGN.md # Architecture and design documentation -│ ├── DEVELOPMENT.md # This file -│ └── attributes/ # Attribute documentation -├── model/ # OpenTelemetry semantic conventions -│ ├── attributes.yaml # Attribute definitions -│ └── entities.yaml # Entity definitions -├── compass/ # Compass service module -│ ├── cmd/compass/ # Main application -│ ├── api/ # Generated API code -│ ├── mapper/ # Enrichment mappers -│ └── service/ # Business logic -├── proofwatch/ # ProofWatch instrumentation library -│ ├── attributes.go # Attribute definitions -│ ├── evidence.go # Evidence types -│ └── proofwatch.go # Main library -├── truthbeam/ # TruthBeam processor module -│ ├── internal/ # Internal packages -│ ├── config.go # Configuration -│ └── processor.go # Main processor logic -├── beacon-distro/ # OpenTelemetry Collector distribution -│ ├── config.yaml # Collector configuration -│ └── Containerfile.collector # Container definition -├── hack/ # Development utilities -│ ├── demo/ # Demo configurations -│ ├── sampledata/ # Sample data for testing -│ └── self-signed-cert/ # self signed cert, testing/development purpose -└── bin/ # Built binaries (created by make build) -``` - -## Testing - -### Running Tests - -```bash -# Run all tests -make test - -# Run tests for specific module -cd compass && go test -v ./... -cd proofwatch && go test -v ./... -cd truthbeam && go test -v ./... -``` - -### Integration Testing - -The project includes integration tests using the demo environment: - -```bash -# Start the demo environment -make deploy - -# Test the pipeline -curl -X POST http://localhost:8088/eventsource/receiver \ - -H "Content-Type: application/json" \ - -d @hack/sampledata/evidence.json - -# Check logs in Grafana at http://localhost:3000 -# Check Compass API at http://localhost:8081/v1/enrich -``` - -## Component Development - -### 1. ProofWatch Development - -ProofWatch is an instrumentation library for emitting compliance evidence. - -**Key Files:** -- `proofwatch/proofwatch.go` - Main library interface -- `proofwatch/evidence.go` - Evidence type definition -- `proofwatch/attributes.go` - OpenTelemetry attributes - -**Development Workflow:** -```bash -cd proofwatch - -# Run tests -go test -v ./... - -# Check for linting issues -go vet ./... - -# Format code -go fmt ./... -``` - -### 2. Compass Development - -Compass is the enrichment service that provides compliance context. - -**Key Files:** -- `compass/cmd/compass/main.go` - Service entry point -- `compass/service/service.go` - Business logic -- `compass/mapper/` - Enrichment mappers -- `api.yaml` - OpenAPI specification - -**Development Workflow:** -```bash -cd compass - -# Run the service locally -go run ./cmd/compass --config hack/demo/config.yaml --catalog hack/sampledata/osps.yaml --port 8081 --skip-tls - -# Test the API -curl -X POST http://localhost:8081/v1/metadata \ - -H "Content-Type: application/json" \ - -d '{"policy": {"policyEngineName": "OPA", "policyRuleId": "deny-root-user"}}' -``` - -**Adding New Mappers:** -1. Create a new mapper in `compass/mapper/plugins/` -2. Implement the `Mapper` interface -3. Register the mapper in the factory -4. Add configuration options - -### 3. TruthBeam Development - -TruthBeam is an OpenTelemetry Collector processor for enriching logs. - -**Key Files:** -- `truthbeam/processor.go` - Main processor logic -- `truthbeam/config.go` - Configuration structures -- `truthbeam/factory.go` - Processor factory - -**Development Workflow:** -```bash -cd truthbeam - -# Run tests -go test -v ./... - -# Test with collector (requires beacon-distro) -cd ../beacon-distro -# Modify config to use local truthbeam -# Run collector with local processor -``` - -**Local development config** - -If you want locally test the TruthBeam, remember to change the [manifest.yaml](https://github.com/complytime/complytime-collector-components/blob/main/beacon-distro/manifest.yaml) - -Add replace directive at the end of [manifest.yaml](https://github.com/complytime/complytime-collector-components/blob/main/beacon-distro/manifest.yaml), to make sure collector use your `truthbeam` code. Default collector will use `- gomod: github.com/complytime/complybeacon/truthbeam main` - -For example: -```yaml -replaces: - - github.com/complytime/complybeacon/truthbeam => github.com/AlexXuan233/complybeacon/truthbeam 52e4a76ea0f72a7049e73e7a5d67d988116a3892 -``` -or -```yaml -replaces: - - github.com/complytime/complybeacon/truthbeam => github.com/AlexXuan233/complybeacon/truthbeam main -``` - -### 4. Beacon Distro Development - -The Beacon distribution is a custom OpenTelemetry Collector. - -**Key Files:** -- `beacon-distro/config.yaml` - Collector configuration -- `beacon-distro/Containerfile.collector` - Container definition - -**Development Workflow:** -```bash -cd beacon-distro - -# Build the collector image -podman build -f Containerfile.collector -t complybeacon-beacon-distro:latest . - -# Test with local configuration -podman run --rm -p 4317:4317 -p 8088:8088 \ - -v $(pwd)/config.yaml:/etc/otel-collector.yaml:Z \ - complybeacon-beacon-distro:latest -``` - -## Debugging and Troubleshooting - -### Debugging Tools - -```bash -# View container logs -podman-compose logs -f compass -podman-compose logs -f collector -``` - -## Code Generation - -The project uses several code generation tools: - -### 1. API Code Generation - -Generate Go code from OpenAPI specification: - -```bash -make api-codegen -``` - -This generates: -- `compass/api/types.gen.go` - Request/response types -- `compass/api/server.gen.go` - Server interfaces - -### 2. OpenTelemetry Semantic Conventions - -Generate documentation and Go code from semantic convention models: - -```bash -# Generate documentation -make weaver-docsgen - -# Generate Go code -make weaver-codegen - -# Validate models -make weaver-check -``` - -### 3. Manual Code Generation - -If you modify the OpenAPI spec or semantic conventions: - -```bash -# Update API spec -vim api.yaml - -# Regenerate API code -make api-codegen - -# Update semantic conventions -vim model/attributes.yaml -vim model/entities.yaml - -# Regenerate semantic convention code -make weaver-codegen -``` - -## Deployment and Demo - -### Local Development Demo - -The demo environment uses `podman-compose` to orchestrate multiple containers. Ensure you have `podman-compose` installed before proceeding. - -1. **Generate self-signed certificate** - -Since compass and truthbeam enabled TLS by default, first we need to generate self-signed certificate for testing/development - -```shell -make generate-self-signed-cert -``` - -2. **Start the full stack:** -```bash -make deploy -``` - -3. **Test the pipeline:** -```bash -curl -X POST http://localhost:8088/eventsource/receiver \ - -H "Content-Type: application/json" \ - -d @hack/sampledata/evidence.json -``` - -4. **View results:** -- Grafana: http://localhost:3000 - -5. **Stop the stack:** -```bash -make undeploy -``` - ---- - -## Additional Resources - -- [OpenTelemetry Documentation](https://opentelemetry.io/docs/) -- [Go Documentation](https://golang.org/doc/) -- [Podman Documentation](https://docs.podman.io/) -- [Project Design Document](https://github.com/complytime/complytime-collector-components/blob/main/docs/DESIGN.md) -- [Attribute Documentation](https://github.com/complytime/complytime-collector-components/blob/main/docs/attributes) -- [Containers Guide](https://github.com/complytime/community/blob/main/CONTAINERS_GUIDE.md) - -For questions or support, please open an issue in the GitHub repository. diff --git a/content/docs/projects/complytime-collector-components/integration/sync-evidence-hyperproof.md b/content/docs/projects/complytime-collector-components/integration/sync-evidence-hyperproof.md deleted file mode 100644 index 67a5f4e..0000000 --- a/content/docs/projects/complytime-collector-components/integration/sync-evidence-hyperproof.md +++ /dev/null @@ -1,163 +0,0 @@ ---- -description: Integration guide for syncing compliance evidence to Hyperproof. -title: Sync Evidence to Hyperproof -weight: 50 ---- - - -# Auto-Sync Evidence to Hyperproof - -## 1. Objective and Value -The purpose of this document is to detail the architecture and workflow for automatically syncing compliance evidence into [Hyperproof](https://hyperproof.io/). This process automates the "last mile" of the compliance journey: delivering collected, enriched, and verified evidence directly into the organisation's GRC (Governance, Risk, and Compliance) platform. - ---- - -### **Business Value** -Implementing this workflow closes the loop between technical operations and compliance auditing, achieving: - -* Continuous Compliance: Transforms evidence collection from a periodic, manual scramble into a continuous, automated flow. -* Audit Readiness: Ensures evidence is instantly available to auditors and stakeholders within [Hyperproof](https://hyperproof.io/). -* End-to-End Automation: Fully automates the pipeline from code check-in (or system event) to auditor review. - ---- - -## 2. Technical Architecture & Workflow -The automation pipeline uses an event-driven architecture hosted on [AWS](https://docs.aws.amazon.com/) to bridge [Complybeacon](https://github.com/complytime/complybeacon) and [Hyperproof](https://hyperproof.io/). - - - -### **The Step-by-Step Workflow** - -| Step | Component | Action | Details | -| :--- | :--- | :--- | :--- | -| Export | Complybeacon | Output | Complybeacon completes evidence collection and exports the finalized logs. | -| Ingestion | AWS S3 | Secure Storage | The evidence logs are deposited into the designated S3 Bucket. | -| Trigger | S3 Event | Event-Driven | The creation of a new object in S3 automatically triggers the linked AWS Lambda Function. | -| Processing | AWS Lambda | Transformation/Push | The function executes a Python script that retrieves the Hyperproof secrets from AWS SSM, authenticates via the Hyperproof API, and pushes the evidence data. | -| Verification | AWS / Hyperproof | Validation | Inspect CloudWatch Logs for successful execution. Then, check Hyperproof to verify the evidence appears in the expected location. | - ---- - -## 3. Preparation & Prerequisites -Before configuring the automation, the following components and credentials must be provisioned. - -### **3.1 Hyperproof Configuration** - -1. **Provision API Credentials:** Create an API client within Hyperproof to allow external access. - * *Path:* `Administrator -> Setting -> API Client` -2. **Record Credentials:** Securely note the `CLIENT_ID` and `CLIENT_SECRET`. - -### **3.2 AWS Infrastructure Setup** - -#### **A. IAM & [S3 Bucket](https://docs.aws.amazon.com/s3/?icmpid=docs_homepage_featuredsvcs) (Storage)** - -1. Create S3 Bucket: Provision a new AWS S3 bucket for evidence ingestion. Note the Bucket Name. -2. [Create IAM Policy](https://docs.hyperproof.io/cm/en/integrations/hp-amazon-s3): Create an IAM Policy granting write access to this specific S3 bucket (for Complybeacon). - - _Example Policy snippet_ - ``` - { - "Version": "2012-10-17", - "Statement": [ - { - "Sid": "VisualEditor0", - "Effect": "Allow", - "Action": [ - "s3:GetObject" - ], - "Resource": [ - "arn:aws:s3:::sw-s3-hyperproof/*", # Update the S3 bucket name - "arn:aws:s3:::sw-s3-hyperproof" # Update the S3 bucket name - ] - }, - { - "Sid": "VisualEditor1", - "Effect": "Allow", - "Action": [ - "s3:ListAllMyBuckets", - "s3:ListBucket" - ], - "Resource": "*" - }, - { - "Sid": "VisualEditor2", - "Effect": "Allow", - "Action": "s3:PutObject", - "Resource": "arn:aws:s3:::sw-s3-hyperproof/*" # Update the S3 bucket name - } - ] - } - ``` -3. [Create IAM User](https://docs.hyperproof.io/cm/en/integrations/hp-amazon-s3): Create an IAM User (for Complybeacon), attach the policy, and generate the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`. - -#### **B. [Systems Manager](https://docs.aws.amazon.com/systems-manager/?icmpid=docs_homepage_mgmtgov) (Secrets Management)** - -Create new **`SecureString`** parameters in the AWS Systems Manager (SSM) Parameter Store to securely hold the Hyperproof credentials. - -* `/hyperproof/CLIENT_ID` -* `/hyperproof/CLIENT_SECRET` - -#### **C. [Lambda Function](https://docs.aws.amazon.com/lambda/?icmpid=docs_homepage_featuredsvcs)** - -1. **Create Function:** Initialise a new AWS Lambda function (using Python runtime). -2. **Configure Triggers:** Add an S3 trigger linking it to the bucket from step **3.2 A**, configured to fire ***only*** on `s3:ObjectCreated:Put` and `s3:ObjectCreated:Post` events(Very important). -3. **Configure IAM Execution Role:** - * Attach the managed policy `AmazonS3ReadOnlyAccess` (to allow Lambda to read the evidence logs). - * Create and attach an inline policy granting `ssm:GetParameter` and `kms:Decrypt` permission to read the specific SSM parameters (`/hyperproof/CLIENT_ID`, `/hyperproof/CLIENT_SECRET`). - - _Example Policy snippet_ - ```json - { - "Version": "2012-10-17", - "Statement": [ - { - "Sid": "VisualEditor0", - "Effect": "Allow", - "Action": [ - "s3:GetObject" - ], - "Resource": "arn:aws:s3:::alex-hyperproof-test/*" - } - ] - } - ``` - - ```json - { - "Version": "2012-10-17", - "Statement": [ - { - "Sid": "VisualEditor0", - "Effect": "Allow", - "Action": "kms:Decrypt", - "Resource": "*" - }, - { - "Sid": "VisualEditor1", - "Effect": "Allow", - "Action": "ssm:GetParameter", - "Resource": [ - "arn:aws:ssm:eu-north-1:725106756198:parameter/hyperproof/CLIENT_ID", - "arn:aws:ssm:eu-north-1:725106756198:parameter/hyperproof/CLIENT_SECRET" - ] - } - ] - } - ``` - -4. **Dependencies & Layers:** Create and attach a Lambda Layer containing the necessary Python libraries (`requests`). -5. **Set Environment Variables:** Configure the following (for the Python script to use): - * `CLIENT_ID`: `/hyperproof/CLIENT_ID` - * `CLIENT_SECRET`: `/hyperproof/CLIENT_SECRET` -6. **Deploy Code:** Deploy the actual [sync code](https://gitlab.cee.redhat.com/product-security/continuous-compliance/SyncEvidence2Hyperproof/-/blob/main/lambda_function.py?ref_type=heads) (which reads S3, retrieves secrets from SSM, and calls the Hyperproof API) into the Lambda Function editor. -7. **Setup timeout** Go to Configuration->General configuration, increase timeout value to a bigger value, for example 10s(default is 3). - ---- - -## 4. Execution -Once all prerequisites are complete, the pipeline is activated automatically: - -1. The Complybeacon exports the evidence log. -2. The evidence log is written to the configured S3 bucket. -3. The S3 write event immediately triggers the Lambda function. -4. The Lambda function executes, pushing the evidence log to Hyperproof. \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..6bad0ba --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module github.com/complytime/website + +go 1.25.8 + +require gopkg.in/yaml.v3 v3.0.1 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..a62c313 --- /dev/null +++ b/go.sum @@ -0,0 +1,4 @@ +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/layouts/_default/_markup/render-heading.html b/layouts/_default/_markup/render-heading.html new file mode 100644 index 0000000..e6dbff4 --- /dev/null +++ b/layouts/_default/_markup/render-heading.html @@ -0,0 +1,4 @@ + + {{- .Text -}} + # + diff --git a/layouts/_partials/main/edit-page.html b/layouts/_partials/main/edit-page.html new file mode 100644 index 0000000..437af16 --- /dev/null +++ b/layouts/_partials/main/edit-page.html @@ -0,0 +1,48 @@ +{{- /* Use per-page editURL (set by sync tool for upstream content) when available, + otherwise fall back to the default docsRepo-based URL. */ -}} + +{{ $url := "" }} + +{{ with .Params.editURL }} + {{ $url = . }} +{{ else }} + {{ $parts := slice site.Params.doks.docsRepo }} + + {{ if (eq site.Params.doks.repoHost "GitHub") }} + {{ $parts = $parts | append "blob" site.Params.doks.docsRepoBranch }} + {{ else if (eq site.Params.doks.repoHost "Gitea") }} + {{ $parts = $parts | append "_edit" site.Params.doks.docsRepoBranch }} + {{ else if (eq site.Params.doks.repoHost "GitLab") }} + {{ $parts = $parts | append "-/blob" site.Params.doks.docsRepoBranch }} + {{ else if (eq site.Params.doks.repoHost "Bitbucket") }} + {{ $parts = $parts | append "src" site.Params.doks.docsRepoBranch }} + {{ else if (eq site.Params.doks.repoHost "BitbucketServer") }} + {{ $parts = $parts | append "browse" site.Params.doks.docsRepoBranch }} + {{ end }} + + {{ if isset .Site.Params "docsreposubpath" }} + {{ if not (eq site.Params.doks.docsRepoSubPath "") }} + {{ $parts = $parts | append site.Params.doks.docsRepoSubPath }} + {{ end }} + {{ end }} + + {{ $filePath := replace .File.Path "\\" "/" }} + + {{ $lang := "" }} + {{ if site.Params.doks.multilingualMode }} + {{ $lang = .Lang }} + {{ end }} + + {{ $parts = $parts | append "content" $lang $filePath }} + + {{ $url = delimit $parts "/" }} +{{ end }} + + diff --git a/layouts/home.html b/layouts/home.html index 0c08d86..4ab7c50 100644 --- a/layouts/home.html +++ b/layouts/home.html @@ -110,56 +110,28 @@

Our Projects

A suite of tools designed to streamline compliance workflows from code to audit.

+ {{- $colors := dict "Go" "primary" "Python" "warning" "Shell" "secondary" "TypeScript" "info" -}} + {{- $projects := site.Data.projects | default slice -}} + {{- if $projects }} + {{- $projects = sort $projects "stars" "desc" -}} + {{- end }} diff --git a/layouts/shortcodes/project-cards.html b/layouts/shortcodes/project-cards.html new file mode 100644 index 0000000..d4b5d4b --- /dev/null +++ b/layouts/shortcodes/project-cards.html @@ -0,0 +1,31 @@ +{{- $colors := dict "Go" "primary" "Python" "warning" "Shell" "secondary" "TypeScript" "info" -}} +{{- $projects := site.Data.projects | default slice -}} +{{- if $projects }} +{{- $projects = sort $projects "stars" "desc" -}} +{{- $types := slice -}} +{{- range $projects -}} + {{- $types = $types | append .type -}} +{{- end -}} +{{- $types = $types | uniq -}} + +{{- range $type := $types }} +

{{ $type }}

+ +{{- end }} +{{- end }} diff --git a/package-lock.json b/package-lock.json index dd34d8e..b4cacaf 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,13 +1,13 @@ { - "name": "doks", - "version": "1.8.1", + "name": "website", + "version": "0.1.0", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "doks", - "version": "1.8.1", - "license": "MIT", + "name": "website", + "version": "0.1.0", + "license": "Apache-2.0", "dependencies": { "@tabler/icons": "^3.34.1", "@thulite/doks-core": "^1.8.3", @@ -2319,27 +2319,6 @@ } } }, - "node_modules/@isaacs/balanced-match": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/@isaacs/balanced-match/-/balanced-match-4.0.1.tgz", - "integrity": "sha512-yzMTt9lEb8Gv7zRioUilSglI0c0smZ9k5D65677DLWLtWJaXIS3CqcGyUFByYKlnUj6TkjLVs54fBl6+TiGQDQ==", - "license": "MIT", - "engines": { - "node": "20 || >=22" - } - }, - "node_modules/@isaacs/brace-expansion": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/@isaacs/brace-expansion/-/brace-expansion-5.0.1.tgz", - "integrity": "sha512-WMz71T1JS624nWj2n2fnYAuPovhv7EUhk69R6i9dsVyzxt5eM3bjwvgk9L+APE1TRscGysAVMANkB0jh0LQZrQ==", - "license": "MIT", - "dependencies": { - "@isaacs/balanced-match": "^4.0.1" - }, - "engines": { - "node": "20 || >=22" - } - }, "node_modules/@isaacs/cliui": { "version": "9.0.0", "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-9.0.0.tgz", @@ -2522,9 +2501,9 @@ } }, "node_modules/@rollup/rollup-android-arm-eabi": { - "version": "4.46.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.46.1.tgz", - "integrity": "sha512-oENme6QxtLCqjChRUUo3S6X8hjCXnWmJWnedD7VbGML5GUtaOtAyx+fEEXnBXVf0CBZApMQU0Idwi0FmyxzQhw==", + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.59.0.tgz", + "integrity": "sha512-upnNBkA6ZH2VKGcBj9Fyl9IGNPULcjXRlg0LLeaioQWueH30p6IXtJEbKAgvyv+mJaMxSm1l6xwDXYjpEMiLMg==", "cpu": [ "arm" ], @@ -2536,9 +2515,9 @@ ] }, "node_modules/@rollup/rollup-android-arm64": { - "version": "4.46.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.46.1.tgz", - "integrity": "sha512-OikvNT3qYTl9+4qQ9Bpn6+XHM+ogtFadRLuT2EXiFQMiNkXFLQfNVppi5o28wvYdHL2s3fM0D/MZJ8UkNFZWsw==", + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.59.0.tgz", + "integrity": "sha512-hZ+Zxj3SySm4A/DylsDKZAeVg0mvi++0PYVceVyX7hemkw7OreKdCvW2oQ3T1FMZvCaQXqOTHb8qmBShoqk69Q==", "cpu": [ "arm64" ], @@ -2550,9 +2529,9 @@ ] }, "node_modules/@rollup/rollup-darwin-arm64": { - "version": "4.46.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.46.1.tgz", - "integrity": "sha512-EFYNNGij2WllnzljQDQnlFTXzSJw87cpAs4TVBAWLdkvic5Uh5tISrIL6NRcxoh/b2EFBG/TK8hgRrGx94zD4A==", + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.59.0.tgz", + "integrity": "sha512-W2Psnbh1J8ZJw0xKAd8zdNgF9HRLkdWwwdWqubSVk0pUuQkoHnv7rx4GiF9rT4t5DIZGAsConRE3AxCdJ4m8rg==", "cpu": [ "arm64" ], @@ -2564,9 +2543,9 @@ ] }, "node_modules/@rollup/rollup-darwin-x64": { - "version": "4.46.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.46.1.tgz", - "integrity": "sha512-ZaNH06O1KeTug9WI2+GRBE5Ujt9kZw4a1+OIwnBHal92I8PxSsl5KpsrPvthRynkhMck4XPdvY0z26Cym/b7oA==", + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.59.0.tgz", + "integrity": "sha512-ZW2KkwlS4lwTv7ZVsYDiARfFCnSGhzYPdiOU4IM2fDbL+QGlyAbjgSFuqNRbSthybLbIJ915UtZBtmuLrQAT/w==", "cpu": [ "x64" ], @@ -2578,9 +2557,9 @@ ] }, "node_modules/@rollup/rollup-freebsd-arm64": { - "version": "4.46.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.46.1.tgz", - "integrity": "sha512-n4SLVebZP8uUlJ2r04+g2U/xFeiQlw09Me5UFqny8HGbARl503LNH5CqFTb5U5jNxTouhRjai6qPT0CR5c/Iig==", + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.59.0.tgz", + "integrity": "sha512-EsKaJ5ytAu9jI3lonzn3BgG8iRBjV4LxZexygcQbpiU0wU0ATxhNVEpXKfUa0pS05gTcSDMKpn3Sx+QB9RlTTA==", "cpu": [ "arm64" ], @@ -2592,9 +2571,9 @@ ] }, "node_modules/@rollup/rollup-freebsd-x64": { - "version": "4.46.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.46.1.tgz", - "integrity": "sha512-8vu9c02F16heTqpvo3yeiu7Vi1REDEC/yES/dIfq3tSXe6mLndiwvYr3AAvd1tMNUqE9yeGYa5w7PRbI5QUV+w==", + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.59.0.tgz", + "integrity": "sha512-d3DuZi2KzTMjImrxoHIAODUZYoUUMsuUiY4SRRcJy6NJoZ6iIqWnJu9IScV9jXysyGMVuW+KNzZvBLOcpdl3Vg==", "cpu": [ "x64" ], @@ -2606,9 +2585,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm-gnueabihf": { - "version": "4.46.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.46.1.tgz", - "integrity": "sha512-K4ncpWl7sQuyp6rWiGUvb6Q18ba8mzM0rjWJ5JgYKlIXAau1db7hZnR0ldJvqKWWJDxqzSLwGUhA4jp+KqgDtQ==", + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.59.0.tgz", + "integrity": "sha512-t4ONHboXi/3E0rT6OZl1pKbl2Vgxf9vJfWgmUoCEVQVxhW6Cw/c8I6hbbu7DAvgp82RKiH7TpLwxnJeKv2pbsw==", "cpu": [ "arm" ], @@ -2620,9 +2599,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm-musleabihf": { - "version": "4.46.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.46.1.tgz", - "integrity": "sha512-YykPnXsjUjmXE6j6k2QBBGAn1YsJUix7pYaPLK3RVE0bQL2jfdbfykPxfF8AgBlqtYbfEnYHmLXNa6QETjdOjQ==", + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.59.0.tgz", + "integrity": "sha512-CikFT7aYPA2ufMD086cVORBYGHffBo4K8MQ4uPS/ZnY54GKj36i196u8U+aDVT2LX4eSMbyHtyOh7D7Zvk2VvA==", "cpu": [ "arm" ], @@ -2634,9 +2613,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm64-gnu": { - "version": "4.46.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.46.1.tgz", - "integrity": "sha512-kKvqBGbZ8i9pCGW3a1FH3HNIVg49dXXTsChGFsHGXQaVJPLA4f/O+XmTxfklhccxdF5FefUn2hvkoGJH0ScWOA==", + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.59.0.tgz", + "integrity": "sha512-jYgUGk5aLd1nUb1CtQ8E+t5JhLc9x5WdBKew9ZgAXg7DBk0ZHErLHdXM24rfX+bKrFe+Xp5YuJo54I5HFjGDAA==", "cpu": [ "arm64" ], @@ -2648,9 +2627,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm64-musl": { - "version": "4.46.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.46.1.tgz", - "integrity": "sha512-zzX5nTw1N1plmqC9RGC9vZHFuiM7ZP7oSWQGqpbmfjK7p947D518cVK1/MQudsBdcD84t6k70WNczJOct6+hdg==", + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.59.0.tgz", + "integrity": "sha512-peZRVEdnFWZ5Bh2KeumKG9ty7aCXzzEsHShOZEFiCQlDEepP1dpUl/SrUNXNg13UmZl+gzVDPsiCwnV1uI0RUA==", "cpu": [ "arm64" ], @@ -2661,10 +2640,24 @@ "linux" ] }, - "node_modules/@rollup/rollup-linux-loongarch64-gnu": { - "version": "4.46.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loongarch64-gnu/-/rollup-linux-loongarch64-gnu-4.46.1.tgz", - "integrity": "sha512-O8CwgSBo6ewPpktFfSDgB6SJN9XDcPSvuwxfejiddbIC/hn9Tg6Ai0f0eYDf3XvB/+PIWzOQL+7+TZoB8p9Yuw==", + "node_modules/@rollup/rollup-linux-loong64-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.59.0.tgz", + "integrity": "sha512-gbUSW/97f7+r4gHy3Jlup8zDG190AuodsWnNiXErp9mT90iCy9NKKU0Xwx5k8VlRAIV2uU9CsMnEFg/xXaOfXg==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-loong64-musl": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.59.0.tgz", + "integrity": "sha512-yTRONe79E+o0FWFijasoTjtzG9EBedFXJMl888NBEDCDV9I2wGbFFfJQQe63OijbFCUZqxpHz1GzpbtSFikJ4Q==", "cpu": [ "loong64" ], @@ -2676,9 +2669,23 @@ ] }, "node_modules/@rollup/rollup-linux-ppc64-gnu": { - "version": "4.46.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.46.1.tgz", - "integrity": "sha512-JnCfFVEKeq6G3h3z8e60kAp8Rd7QVnWCtPm7cxx+5OtP80g/3nmPtfdCXbVl063e3KsRnGSKDHUQMydmzc/wBA==", + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.59.0.tgz", + "integrity": "sha512-sw1o3tfyk12k3OEpRddF68a1unZ5VCN7zoTNtSn2KndUE+ea3m3ROOKRCZxEpmT9nsGnogpFP9x6mnLTCaoLkA==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-ppc64-musl": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.59.0.tgz", + "integrity": "sha512-+2kLtQ4xT3AiIxkzFVFXfsmlZiG5FXYW7ZyIIvGA7Bdeuh9Z0aN4hVyXS/G1E9bTP/vqszNIN/pUKCk/BTHsKA==", "cpu": [ "ppc64" ], @@ -2690,9 +2697,9 @@ ] }, "node_modules/@rollup/rollup-linux-riscv64-gnu": { - "version": "4.46.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.46.1.tgz", - "integrity": "sha512-dVxuDqS237eQXkbYzQQfdf/njgeNw6LZuVyEdUaWwRpKHhsLI+y4H/NJV8xJGU19vnOJCVwaBFgr936FHOnJsQ==", + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.59.0.tgz", + "integrity": "sha512-NDYMpsXYJJaj+I7UdwIuHHNxXZ/b/N2hR15NyH3m2qAtb/hHPA4g4SuuvrdxetTdndfj9b1WOmy73kcPRoERUg==", "cpu": [ "riscv64" ], @@ -2704,9 +2711,9 @@ ] }, "node_modules/@rollup/rollup-linux-riscv64-musl": { - "version": "4.46.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.46.1.tgz", - "integrity": "sha512-CvvgNl2hrZrTR9jXK1ye0Go0HQRT6ohQdDfWR47/KFKiLd5oN5T14jRdUVGF4tnsN8y9oSfMOqH6RuHh+ck8+w==", + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.59.0.tgz", + "integrity": "sha512-nLckB8WOqHIf1bhymk+oHxvM9D3tyPndZH8i8+35p/1YiVoVswPid2yLzgX7ZJP0KQvnkhM4H6QZ5m0LzbyIAg==", "cpu": [ "riscv64" ], @@ -2718,9 +2725,9 @@ ] }, "node_modules/@rollup/rollup-linux-s390x-gnu": { - "version": "4.46.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.46.1.tgz", - "integrity": "sha512-x7ANt2VOg2565oGHJ6rIuuAon+A8sfe1IeUx25IKqi49OjSr/K3awoNqr9gCwGEJo9OuXlOn+H2p1VJKx1psxA==", + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.59.0.tgz", + "integrity": "sha512-oF87Ie3uAIvORFBpwnCvUzdeYUqi2wY6jRFWJAy1qus/udHFYIkplYRW+wo+GRUP4sKzYdmE1Y3+rY5Gc4ZO+w==", "cpu": [ "s390x" ], @@ -2732,9 +2739,9 @@ ] }, "node_modules/@rollup/rollup-linux-x64-gnu": { - "version": "4.46.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.46.1.tgz", - "integrity": "sha512-9OADZYryz/7E8/qt0vnaHQgmia2Y0wrjSSn1V/uL+zw/i7NUhxbX4cHXdEQ7dnJgzYDS81d8+tf6nbIdRFZQoQ==", + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.59.0.tgz", + "integrity": "sha512-3AHmtQq/ppNuUspKAlvA8HtLybkDflkMuLK4DPo77DfthRb71V84/c4MlWJXixZz4uruIH4uaa07IqoAkG64fg==", "cpu": [ "x64" ], @@ -2746,9 +2753,9 @@ ] }, "node_modules/@rollup/rollup-linux-x64-musl": { - "version": "4.46.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.46.1.tgz", - "integrity": "sha512-NuvSCbXEKY+NGWHyivzbjSVJi68Xfq1VnIvGmsuXs6TCtveeoDRKutI5vf2ntmNnVq64Q4zInet0UDQ+yMB6tA==", + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.59.0.tgz", + "integrity": "sha512-2UdiwS/9cTAx7qIUZB/fWtToJwvt0Vbo0zmnYt7ED35KPg13Q0ym1g442THLC7VyI6JfYTP4PiSOWyoMdV2/xg==", "cpu": [ "x64" ], @@ -2759,10 +2766,38 @@ "linux" ] }, + "node_modules/@rollup/rollup-openbsd-x64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.59.0.tgz", + "integrity": "sha512-M3bLRAVk6GOwFlPTIxVBSYKUaqfLrn8l0psKinkCFxl4lQvOSz8ZrKDz2gxcBwHFpci0B6rttydI4IpS4IS/jQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ] + }, + "node_modules/@rollup/rollup-openharmony-arm64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.59.0.tgz", + "integrity": "sha512-tt9KBJqaqp5i5HUZzoafHZX8b5Q2Fe7UjYERADll83O4fGqJ49O1FsL6LpdzVFQcpwvnyd0i+K/VSwu/o/nWlA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ] + }, "node_modules/@rollup/rollup-win32-arm64-msvc": { - "version": "4.46.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.46.1.tgz", - "integrity": "sha512-mWz+6FSRb82xuUMMV1X3NGiaPFqbLN9aIueHleTZCc46cJvwTlvIh7reQLk4p97dv0nddyewBhwzryBHH7wtPw==", + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.59.0.tgz", + "integrity": "sha512-V5B6mG7OrGTwnxaNUzZTDTjDS7F75PO1ae6MJYdiMu60sq0CqN5CVeVsbhPxalupvTX8gXVSU9gq+Rx1/hvu6A==", "cpu": [ "arm64" ], @@ -2774,9 +2809,9 @@ ] }, "node_modules/@rollup/rollup-win32-ia32-msvc": { - "version": "4.46.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.46.1.tgz", - "integrity": "sha512-7Thzy9TMXDw9AU4f4vsLNBxh7/VOKuXi73VH3d/kHGr0tZ3x/ewgL9uC7ojUKmH1/zvmZe2tLapYcZllk3SO8Q==", + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.59.0.tgz", + "integrity": "sha512-UKFMHPuM9R0iBegwzKF4y0C4J9u8C6MEJgFuXTBerMk7EJ92GFVFYBfOZaSGLu6COf7FxpQNqhNS4c4icUPqxA==", "cpu": [ "ia32" ], @@ -2787,10 +2822,24 @@ "win32" ] }, + "node_modules/@rollup/rollup-win32-x64-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.59.0.tgz", + "integrity": "sha512-laBkYlSS1n2L8fSo1thDNGrCTQMmxjYY5G0WFWjFFYZkKPjsMBsgJfGf4TLxXrF6RyhI60L8TMOjBMvXiTcxeA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, "node_modules/@rollup/rollup-win32-x64-msvc": { - "version": "4.46.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.46.1.tgz", - "integrity": "sha512-7GVB4luhFmGUNXXJhH2jJwZCFB3pIOixv2E3s17GQHBFUOQaISlt7aGcQgqvCaDSxTZJUzlK/QJ1FN8S94MrzQ==", + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.59.0.tgz", + "integrity": "sha512-2HRCml6OztYXyJXAvdDXPKcawukWY2GpR5/nxKp4iBgiO3wcoEGkAaqctIbZcNB6KlUQBIqt8VYkNSj2397EfA==", "cpu": [ "x64" ], @@ -4222,9 +4271,9 @@ } }, "node_modules/minimatch": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", - "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "version": "3.1.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz", + "integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==", "license": "ISC", "dependencies": { "brace-expansion": "^1.1.7" @@ -4798,6 +4847,27 @@ "scss-parser": "1.0.3" } }, + "node_modules/purgecss/node_modules/balanced-match": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz", + "integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==", + "license": "MIT", + "engines": { + "node": "18 || 20 || >=22" + } + }, + "node_modules/purgecss/node_modules/brace-expansion": { + "version": "5.0.4", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.4.tgz", + "integrity": "sha512-h+DEnpVvxmfVefa4jFbCf5HdH5YMDXRsmKflpf1pILZWRFlTbJpxeU55nJl4Smt5HQaGzg1o6RHFPJaOqnmBDg==", + "license": "MIT", + "dependencies": { + "balanced-match": "^4.0.2" + }, + "engines": { + "node": "18 || 20 || >=22" + } + }, "node_modules/purgecss/node_modules/commander": { "version": "12.1.0", "resolved": "https://registry.npmjs.org/commander/-/commander-12.1.0.tgz", @@ -4832,15 +4902,15 @@ } }, "node_modules/purgecss/node_modules/minimatch": { - "version": "10.1.2", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.1.2.tgz", - "integrity": "sha512-fu656aJ0n2kcXwsnwnv9g24tkU5uSmOlTjd6WyyaKm2Z+h1qmY6bAjrcaIxF/BslFqbZ8UBtbJi7KgQOZD2PTw==", + "version": "10.2.4", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.4.tgz", + "integrity": "sha512-oRjTw/97aTBN0RHbYCdtF1MQfvusSIBQM0IZEgzl6426+8jSC0nF1a/GmnVLpfB9yyr6g6FTqWqiZVbxrtaCIg==", "license": "BlueOak-1.0.0", "dependencies": { - "@isaacs/brace-expansion": "^5.0.1" + "brace-expansion": "^5.0.2" }, "engines": { - "node": "20 || >=22" + "node": "18 || 20 || >=22" }, "funding": { "url": "https://github.com/sponsors/isaacs" @@ -5095,9 +5165,9 @@ } }, "node_modules/rollup": { - "version": "4.46.1", - "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.46.1.tgz", - "integrity": "sha512-33xGNBsDJAkzt0PvninskHlWnTIPgDtTwhg0U38CUoNP/7H6wI2Cz6dUeoNPbjdTdsYTGuiFFASuUOWovH0SyQ==", + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.59.0.tgz", + "integrity": "sha512-2oMpl67a3zCH9H79LeMcbDhXW/UmWG/y2zuqnF2jQq5uq9TbM9TVyXvA4+t+ne2IIkBdrLpAaRQAvo7YI/Yyeg==", "dev": true, "license": "MIT", "dependencies": { @@ -5111,26 +5181,31 @@ "npm": ">=8.0.0" }, "optionalDependencies": { - "@rollup/rollup-android-arm-eabi": "4.46.1", - "@rollup/rollup-android-arm64": "4.46.1", - "@rollup/rollup-darwin-arm64": "4.46.1", - "@rollup/rollup-darwin-x64": "4.46.1", - "@rollup/rollup-freebsd-arm64": "4.46.1", - "@rollup/rollup-freebsd-x64": "4.46.1", - "@rollup/rollup-linux-arm-gnueabihf": "4.46.1", - "@rollup/rollup-linux-arm-musleabihf": "4.46.1", - "@rollup/rollup-linux-arm64-gnu": "4.46.1", - "@rollup/rollup-linux-arm64-musl": "4.46.1", - "@rollup/rollup-linux-loongarch64-gnu": "4.46.1", - "@rollup/rollup-linux-ppc64-gnu": "4.46.1", - "@rollup/rollup-linux-riscv64-gnu": "4.46.1", - "@rollup/rollup-linux-riscv64-musl": "4.46.1", - "@rollup/rollup-linux-s390x-gnu": "4.46.1", - "@rollup/rollup-linux-x64-gnu": "4.46.1", - "@rollup/rollup-linux-x64-musl": "4.46.1", - "@rollup/rollup-win32-arm64-msvc": "4.46.1", - "@rollup/rollup-win32-ia32-msvc": "4.46.1", - "@rollup/rollup-win32-x64-msvc": "4.46.1", + "@rollup/rollup-android-arm-eabi": "4.59.0", + "@rollup/rollup-android-arm64": "4.59.0", + "@rollup/rollup-darwin-arm64": "4.59.0", + "@rollup/rollup-darwin-x64": "4.59.0", + "@rollup/rollup-freebsd-arm64": "4.59.0", + "@rollup/rollup-freebsd-x64": "4.59.0", + "@rollup/rollup-linux-arm-gnueabihf": "4.59.0", + "@rollup/rollup-linux-arm-musleabihf": "4.59.0", + "@rollup/rollup-linux-arm64-gnu": "4.59.0", + "@rollup/rollup-linux-arm64-musl": "4.59.0", + "@rollup/rollup-linux-loong64-gnu": "4.59.0", + "@rollup/rollup-linux-loong64-musl": "4.59.0", + "@rollup/rollup-linux-ppc64-gnu": "4.59.0", + "@rollup/rollup-linux-ppc64-musl": "4.59.0", + "@rollup/rollup-linux-riscv64-gnu": "4.59.0", + "@rollup/rollup-linux-riscv64-musl": "4.59.0", + "@rollup/rollup-linux-s390x-gnu": "4.59.0", + "@rollup/rollup-linux-x64-gnu": "4.59.0", + "@rollup/rollup-linux-x64-musl": "4.59.0", + "@rollup/rollup-openbsd-x64": "4.59.0", + "@rollup/rollup-openharmony-arm64": "4.59.0", + "@rollup/rollup-win32-arm64-msvc": "4.59.0", + "@rollup/rollup-win32-ia32-msvc": "4.59.0", + "@rollup/rollup-win32-x64-gnu": "4.59.0", + "@rollup/rollup-win32-x64-msvc": "4.59.0", "fsevents": "~2.3.2" } }, diff --git a/specs/006-go-sync-tool/research.md b/specs/006-go-sync-tool/research.md new file mode 100644 index 0000000..3679d8f --- /dev/null +++ b/specs/006-go-sync-tool/research.md @@ -0,0 +1,114 @@ +# Research: Go Content Sync Tool + +**Date**: 2026-03-11 +**Spec**: [specs/006-go-sync-tool/spec.md](/specs/006-go-sync-tool/spec.md) + +## R1: Gitignore Patterns for Generated Content + +**Decision**: Use directory-level gitignore patterns with `!` negation for hand-maintained files. + +**Rationale**: The sync tool generates content at two paths: +- `content/docs/projects/{repo}/_index.md` (per-repo project pages) +- `data/projects.json` (landing page cards) + +The hand-maintained `content/docs/projects/_index.md` must remain committed. The gitignore pattern `content/docs/projects/*/` excludes all subdirectories while preserving the section-level `_index.md` file. + +**Alternatives considered**: +- Ignoring by filename pattern (e.g., `*/_index.md`) — too broad, would catch hand-maintained files. +- Committing generated content — rejected per Constitution XIII. + +**Current `.gitignore` state**: Updated — `content/docs/projects/*/` and `data/projects.json` patterns are in place. + +## R2: Hugo Template Integration for `data/projects.json` + +**Decision**: The existing `layouts/home.html` already iterates over `data/projects.json` using Hugo's data template mechanism (`site.Data.projects`). No template changes needed for landing page cards. + +**Rationale**: Verified by reading `layouts/home.html` in the current repo — it contains a projects section that reads from `site.Data.projects`. The `ProjectCard` JSON structure (name, language, type, description, url, repo, stars) matches what the template expects. + +**Alternatives considered**: None — the template is already compatible. + +## R3: Docs Sidebar Integration + +**Decision**: The existing `layouts/docs/list.html` and `config/_default/menus/menus.en.toml` already provide sidebar navigation for docs pages. Each synced repo produces a section index (`_index.md`, frontmatter only) and an overview page (`overview.md`, README content). The `_index.md` makes the repo a Hugo section so child pages appear in the sidebar. The `overview.md` is the first navigable child page (weight 1). + +**Rationale**: Separating the section index from the README content enables the Doks sidebar to render the repo as a collapsible section heading with child pages underneath (overview + doc sub-pages). The `menus.en.toml` already has a `[[docs]]` entry for Projects pointing to `/docs/projects/`. + +**Alternatives considered**: +- Single `_index.md` with README body — rejected because Doks renders `_index.md` body inline at the section level, preventing collapsible sidebar sections with separate child pages. +- Custom sidebar template — unnecessary, Hugo's built-in section discovery handles it. + +### R3a: Sidebar Collapsing via Hugo Cascade + +**Decision**: Use Hugo's `cascade` frontmatter in `content/docs/projects/_index.md` to push `sidebar.collapsed: true` to repo-level section pages, rather than stamping it into each generated `_index.md`. + +**Implementation**: Added to `content/docs/projects/_index.md` frontmatter: +```yaml +cascade: + - sidebar: + collapsed: true + _target: + kind: section + path: "{/docs/projects/*}" +``` + +**Rationale**: The Doks template (`render-section-menu.html` line 89) reads `$node.Page.Params.sidebar.collapsed`. Hugo cascade makes cascaded values accessible through `.Params`, so no template changes are needed. The `_target` uses `kind: section` + single `*` glob to match only repo-level sections (e.g., `complyctl`, `complyscribe`) but not their sub-folders (e.g., `complyctl/man`). + +**Why cascade over sync tool frontmatter**: +- Sidebar behavior is defined once in the content hierarchy, not repeated per-page +- The sync tool stays focused on content, not UI concerns +- New repos automatically inherit collapse behavior without sync tool changes +- Sub-folders remain expanded by design (they don't match the cascade target) + +**Alternatives considered**: +- Stamping `sidebar.collapsed: true` in each generated `_index.md` via `buildSectionIndex` — rejected because it couples UI behavior to the sync tool, violates separation of concerns, and requires sync tool changes when sidebar behavior needs to change. + +## R4: Deploy Workflow Adaptation + +**Decision**: Update `.github/workflows/deploy-gh-pages.yml` to add a sync step before Hugo build: +``` +go run ./cmd/sync-content --org complytime --config sync-config.yaml --lock .content-lock.json --write +``` + +**Rationale**: The current deploy workflow (`deploy-gh-pages.yml`) does `npm ci` then `hugo --minify --gc`. The sync step must run after Go is available and before Hugo builds. Go setup can use `actions/setup-go@v5`. + +**Current workflow state**: Updated — `deploy-gh-pages.yml` includes Go setup, sync step with `--lock .content-lock.json --write`, `GITHUB_TOKEN`, push-to-main trigger, and `workflow_dispatch`. The original daily cron was removed in favour of the PR-gated content approval model (Constitution XV v1.3.0): a separate `sync-content-check.yml` workflow runs weekly to detect upstream changes and opens a PR to update `.content-lock.json`. + +**Alternatives considered**: +- Separate sync and build workflows — rejected because the deploy must always use fresh synced content. A single pipeline ensures consistency. +- Daily cron deploy — rejected because it would silently propagate broken upstream content. PR-gated review is safer. + +## R5: CI Workflow for PR Validation + +**Decision**: Add or update a CI workflow that runs on pull requests with: `go vet`, `gofmt` check, `go test -race`, sync dry-run, and full Hugo build. + +**Rationale**: Constitution X (Go Code Quality) requires vet/fmt checks. Constitution XII (Dry-Run by Default) means CI can safely preview sync output. The existing `sync-content.yml` only handles the sync step — a separate CI workflow is needed for PR validation. + +**Alternatives considered**: +- Combining CI and deploy into one workflow — rejected for clarity and to avoid deploy-specific steps running on PRs. + +## R6: Unit Testing Strategy + +**Decision**: Write tests in `cmd/sync-content/*_test.go` using `net/http/httptest` to mock the GitHub API. Key test areas: +1. `loadConfig` — valid YAML, malformed YAML, missing file, default values +2. `injectFrontmatter` — prepend new, replace existing, empty content +3. `stripBadges` — badge lines removed, inline badges preserved +4. `shiftHeadings` — all headings bumped down one level (H1→H2, H2→H3, …) +5. `rewriteRelativeLinks` — relative→absolute conversion, absolute URLs preserved +6. `buildSectionIndex` / `buildOverviewPage` — frontmatter schema, deterministic output +7. `processRepo` integration — mock API server, verify page + card output +8. `syncConfigSource` — transforms applied, dry-run respected + +**Rationale**: The test-website has `main_test.go` with similar tests. These can be ported and adapted. HTTP test server avoids real API calls. + +**Alternatives considered**: +- Interface-based mocking (inject mock HTTP client) — adds abstraction for no benefit in a single-file tool. `httptest.Server` is simpler and more Go-idiomatic. + +## R7: `data/` Directory Bootstrap + +**Decision**: The `data/` directory must exist for Hugo to find `projects.json`. The sync tool creates it via `os.MkdirAll` when writing `data/projects.json`. For a fresh clone without running the sync tool, Hugo handles the missing `data/` gracefully — `site.Data.projects` returns nil, and the template renders zero cards. + +**Rationale**: Verified — Hugo does not error on missing data files; the template's `range` simply iterates zero items. No empty placeholder file needed. + +**Alternatives considered**: +- Committing an empty `data/projects.json` (`[]`) — rejected per Constitution XIII. +- Adding a `.gitkeep` in `data/` — unnecessary since Hugo handles the absence gracefully. diff --git a/specs/006-go-sync-tool/spec.md b/specs/006-go-sync-tool/spec.md new file mode 100644 index 0000000..1a91227 --- /dev/null +++ b/specs/006-go-sync-tool/spec.md @@ -0,0 +1,239 @@ +# Feature Specification: Go Content Sync Tool + +**Feature Branch**: `006-go-sync-tool` +**Phase**: 2 (Content Infrastructure) + +## Overview + +The ComplyTime website (`complytime.dev`) documents a growing ecosystem of open-source compliance tools hosted across multiple repositories in the `complytime` GitHub organization. Before this feature, project documentation was manually copied into the site — error-prone, inconsistent, and unable to scale as new repos were added. + +This feature replaces that workflow with a Go CLI tool (`cmd/sync-content/`, ~2,100 lines across 10 source files in `package main`) that derives the set of eligible repositories from the org's governance registry (`peribolos.yaml` in the `.github` repo), fetches their README content and per-repo metadata via the GitHub REST API, applies Markdown transforms (heading level shifting, Title Case normalisation with acronym awareness and ALL CAPS normalisation, badge stripping, relative link rewriting), and generates Hugo-compatible pages and landing page card data. A declarative config overlay (`sync-config.yaml`) provides precision control for repos needing custom documentation layouts. + +**Dependencies**: Go 1.25+, `gopkg.in/yaml.v3` (sole third-party Go dep), Hugo 0.155.1 extended, Node.js 22. + +## Scope + +### In Scope + +> IDs are grouped by domain (001–018: core, 030–031: detection, 040–041: site integration, 070: content approval). Gaps between groups are intentional. + +| ID | Capability | +|----|-----------| +| IS-001 | Governance-driven repo discovery: fetch `peribolos.yaml` from `{org}/.github` repo, parse `orgs.{org}.repos` map as authoritative repo list, enrich with GitHub API metadata (stars, language, topics) per repo | +| IS-002 | README fetch with base64 decoding and SHA tracking | +| IS-003 | Per-repo page generation: section index (`_index.md`, frontmatter only, with `formatRepoTitle` for `title` and raw repo name as `linkTitle` for sidebar; ALL CAPS repo/file names normalised to Title Case) + overview page (`overview.md`, README content) | +| IS-004 | Landing page card generation (`data/projects.json`) with type derivation from topics | +| IS-005 | Config-driven file sync with transforms (`inject_frontmatter`, `rewrite_links`, `strip_badges`); heading shift and Title Case applied unconditionally to all synced content | +| IS-006 | Concurrent processing with bounded worker pool (`--workers`) | +| IS-007 | Dry-run by default; `--write` flag required for disk I/O | +| IS-008 | Markdown transforms: `stripLeadingH1` (removes leading H1 — title already in frontmatter), `shiftHeadings` (H1→H2, H2→H3, …), `titleCaseHeadings` (acronym-aware Title Case for in-page headings and TOC; normalises ALL CAPS words to Title Case while preserving known acronyms from the `knownAcronyms` map in `hugo.go` — ~30 domain terms; maintainers add entries as new projects introduce terminology), `stripBadges`, `rewriteRelativeLinks` | +| IS-009 | Repo filtering: `--include`/`--exclude` lists (peribolos is the governance gate; no API metadata filtering) | +| IS-012 | Sync manifest (`.sync-manifest.json`) for orphan file tracking | +| IS-014 | Doc page auto-sync from `discovery.scan_paths` directories | +| IS-016 | Single-repo mode (`--repo`): sync only one repository (validated against peribolos) | +| IS-017 | Summary file generation (`--summary report.md`) | +| IS-018 | GitHub CI outputs: `GITHUB_OUTPUT` variables and `GITHUB_STEP_SUMMARY` | +| IS-030 | Two-tier SHA-based change detection (branch SHA + README SHA) | +| IS-031 | Stale content cleanup via manifest diff (`cleanOrphanedFiles`); legacy directory-scan fallback removed | +| IS-040 | Dynamic landing page project cards from `data/projects.json` | +| IS-041 | Docs sidebar with collapsed repo-level sections via Hugo cascade | +| IS-042 | Hugo render heading hook (`render-heading.html`): adds anchor `id`, clickable `#` link, and `heading` CSS class to all headings site-wide | +| IS-070 | Content lockfile (`.content-lock.json`) for SHA-pinned content approval | + +### Out of Scope + +- `.specify/` artifact sync (fetching upstream `constitution.md`, `spec.md`, `plan.md` into site) — deferred to a future feature +- Private repository access +- GitHub Enterprise / custom API URL +- Log level control (`--verbose` / `--quiet`) +- Config schema versioning + +### Edge Cases (Peribolos Integration) + +| Case | Expected Behavior | +|------|-------------------| +| Repo in peribolos but deleted on GitHub | API metadata fetch returns 404; log warning, skip repo, continue | +| `.github` repo missing or peribolos.yaml absent | Fatal error — log and exit non-zero | +| `--org` flag value doesn't match peribolos `orgs` key | Fatal error — log mismatch and exit non-zero | +| `--repo` flag used (single-repo mode) | Validated against peribolos — repo must exist in governance registry; metadata fetched from API | + +## User Stories + +### US1: Safe Local Preview (Priority: P1) — MVP + +**As a** contributor, **I want to** clone the repo, run the sync tool, and preview the full site locally, **so that** I can verify documentation changes without risk. + +**Acceptance Scenarios**: +- **US1-SC1**: Running without `--write` creates zero files. Tool logs intended actions. +- **US1-SC2**: Running with `--write` generates: (a) section indexes at `content/docs/projects/{repo}/_index.md` (frontmatter only), (b) overview pages at `content/docs/projects/{repo}/overview.md` (README content), (c) doc sub-pages from `discovery.scan_paths`, (d) `data/projects.json`, (e) `.sync-manifest.json`. +- **US1-SC3**: `hugo server` after sync produces zero build errors. Pages accessible at `/docs/projects/`. + +### US2: Governance-Driven Discovery (Priority: P1) + +**As a** site maintainer, **I want** repos declared in the org's governance registry to automatically appear on the website, **so that** the site reflects the org's official repo list without ad-hoc API discovery. + +**Acceptance Scenarios**: +- **US2-SC1**: Repos listed in `peribolos.yaml` (and NOT in `sync-config.yaml`) produce: (a) `_index.md` with frontmatter (`title` via `formatRepoTitle`, `linkTitle` with raw repo name, `description`, `params.language`, `params.stars`, `params.source_sha`, `params.readme_sha`, `params.seo.*`) and no body, (b) `overview.md` with transformed README content (headings shifted and Title Cased). +- **US2-SC2**: `data/projects.json` contains a `ProjectCard` for every eligible repo from peribolos, sorted alphabetically, with fields `name`, `language`, `type`, `description`, `url`, `repo`, `stars`. +- **US2-SC3**: Repos present on GitHub but NOT in `peribolos.yaml` are excluded from sync (governance registry is authoritative). +- **US2-SC4**: If `peribolos.yaml` cannot be fetched (e.g., `.github` repo missing or network error), the tool logs an error and exits non-zero rather than silently falling back to API listing. + +> **Note**: When `--lock` is active (production deploys), new repos not yet in `.content-lock.json` are skipped until the next content sync check PR (US7) adds them to the lockfile and is merged. The approval gate controls when they reach production. + +### US3: Config-Driven Precision Sync (Priority: P1) + +**As a** documentation lead, **I want** precise control over specific files' destinations, frontmatter, and transforms, **so that** key projects have customized documentation layouts. + +**Acceptance Scenarios**: +- **US3-SC1**: For repos with `skip_org_sync: true`, no auto-generated section index or overview page exists, BUT the repo's `ProjectCard` is in `data/projects.json`. +- **US3-SC2**: Config-declared files appear at their `dest` paths with correct transforms applied. + +### US4: Change Detection and Stale Cleanup (Priority: P2) + +**As a** CI pipeline, **I want** the sync tool to skip unchanged repos and clean up stale content, **so that** builds are fast and the site stays clean. + +**Acceptance Scenarios**: +- **US4-SC1**: On a second consecutive run, unchanged repos show "unchanged" in log output with zero disk writes. +- **US4-SC2**: When a repo is removed from the org, all generated files (section index, overview, doc sub-pages, entire directory) are cleaned up. + +### US5: CI/CD Pipeline Integration (Priority: P2) + +**As a** DevOps engineer, **I want** the sync tool to run automatically in GitHub Actions, **so that** production deploys always use reviewed, approved content. + +**Acceptance Scenarios**: +- **US5-SC1**: Deploy workflow includes Go setup, sync step with `GITHUB_TOKEN` and `--lock`, runs before Hugo build. Content is fetched at approved SHAs from `.content-lock.json`. +- **US5-SC2**: CI workflow validates PRs with `go test -race`, content sync (with `--lock`), and Hugo build. Deploy workflow additionally runs `go vet` and `gofmt` checks. +- **US5-SC3**: `GITHUB_OUTPUT` contains `has_changes`, `changed_count`, `error_count`. `GITHUB_STEP_SUMMARY` contains a markdown summary. CI deploys proceed even with non-fatal warnings. + +### US6: Concurrent Processing with Race Safety (Priority: P3) + +**As a** developer, **I want** the tool to process repos concurrently and pass race detection, **so that** processing is fast and correct. + +**Acceptance Scenarios**: +- **US6-SC1**: `go test -race ./cmd/sync-content/...` passes with zero data race warnings. +- **US6-SC2**: Unit tests cover all pure functions; integration tests verify end-to-end processing with mock API. + +### US7: Content Approval Gate (Priority: P2) + +**As a** site maintainer, **I want** upstream documentation changes to require human review before reaching production, **so that** broken or undesirable content never deploys automatically. + +**Acceptance Scenarios**: +- **US7-SC1**: A committed `.content-lock.json` pins each repo to an approved branch SHA. The deploy workflow fetches content at those locked SHAs — not HEAD. +- **US7-SC2**: A weekly check workflow detects upstream changes, updates `.content-lock.json`, and opens a PR. No content change reaches production without a merged PR. +- **US7-SC3**: Running with `--lock` and a repo not in the lockfile skips that repo (unapproved content is not fetched). +- **US7-SC4**: Running with `--lock --update-lock` writes current upstream SHAs to the lockfile for all discovered repos. + +## CLI Interface + +| Flag | Default | Description | +|------|---------|-------------| +| `--org` | `complytime` | GitHub organization — used to locate `peribolos.yaml` in `{org}/.github` and as the `orgs` key for repo extraction | +| `--token` | `$GITHUB_TOKEN` | GitHub API token (or set env var) | +| `--config` | (none) | Path to `sync-config.yaml` for config-driven file syncs | +| `--write` | `false` | Required to write files to disk (default: dry-run) | +| `--output` | `.` | Hugo site root directory | +| `--workers` | `5` | Max concurrent repo processing goroutines | +| `--timeout` | `3m` | Overall timeout for all API operations | +| `--include` | (all) | Comma-separated repo allowlist | +| `--exclude` | (see config) | Comma-separated repo names to skip | +| `--repo` | (none) | Sync only this repo (e.g., `complytime/complyctl`); validated against peribolos | +| `--summary` | (none) | Write markdown change summary to this file | +| `--lock` | (none) | Path to `.content-lock.json` for content approval gating | +| `--update-lock` | `false` | Write current upstream SHAs to the lockfile (requires `--lock`) | + +## Output Structure + +```text +content/docs/projects/ +├── _index.md # Hand-maintained section index (committed) +└── {repo}/ # Generated per-repo content (gitignored) + ├── _index.md # Section index — frontmatter only, no body + ├── overview.md # README content as child page (weight: 1) + └── {doc}.md # Doc pages from discovery.scan_paths + +data/ +└── projects.json # Landing page project cards (gitignored) + +layouts/_default/_markup/ +└── render-heading.html # Hugo render hook — anchor links and heading class (committed) + +.sync-manifest.json # Written file manifest for orphan cleanup (gitignored) +.content-lock.json # Approved upstream SHAs per repo (committed) +``` + +## Non-Functional Requirements + +| ID | Requirement | Target | +|----|------------|--------| +| NFR-001 | Full org sync completes within timeout | < 60s with token | +| NFR-002 | Hugo build time with generated content | < 2s | +| NFR-003 | All logging via `log/slog` with structured fields | — | +| NFR-004 | SPDX license headers on all Go source files | — | +| NFR-005 | All code in `package main` within `cmd/sync-content/`; no unnecessary packages or abstractions | — | +| NFR-006 | Only permitted third-party dep: `gopkg.in/yaml.v3` | — | +| NFR-007 | Generated content gitignored, not committed | — | +| NFR-008 | Idempotent runs: same input produces same output | — | + +## Security Requirements + +| ID | Requirement | Task | +|----|------------|------| +| SEC-001 | Path traversal prevention: all write paths validated under `--output` directory | T028 | +| SEC-002 | Bounded error response body reads (4KB max) to prevent memory exhaustion | T031 | +| SEC-003 | URL path escaping for all API URL construction to prevent injection | T032 | + +## Inherited Capabilities + +The following capabilities were ported from the test-website reference implementation and are functional: + +- **Two-tier SHA-based change detection**: Branch SHA (`params.source_sha`) for fast pre-filtering; README SHA (`params.readme_sha`) for content-level accuracy +- **Single-repo filtering** (`--repo`): Process one repo (validated against peribolos governance registry) +- **Doc page auto-sync**: Syncs Markdown files from `discovery.scan_paths` directories +- **Context cancellation**: `--timeout` flag with context propagation; retry sleep respects cancellation (T029) +- **CI integration outputs**: Writes `GITHUB_OUTPUT` variables and `GITHUB_STEP_SUMMARY` for GitHub Actions; deploys proceed even with non-fatal warnings +- **Content approval gate** (`--lock`): SHA-pinned lockfile gates deployments to reviewed content; weekly check workflow proposes updates via PR + +## Success Criteria + +All criteria must pass before feature 006 merges to `main`. + +| ID | Criterion | Verification | +|----|----------|--------------| +| SC-001 | `go.mod` exists and `go mod verify` passes | `go mod verify` | +| SC-002 | `cmd/sync-content/` compiles without errors | `go build ./cmd/sync-content` | +| SC-003 | Dry-run produces zero files; write mode produces correct output structure | T003, T004 | +| SC-004 | Hugo builds with zero errors after sync | T005 | +| SC-005 | Auto-discovered repos have section index + overview + card | T006, T007 | +| SC-006 | Config overlay applies transforms at declared dest paths | T008, T009 (deferred until sources declared; code paths covered by unit tests) | +| SC-007 | Change detection skips unchanged repos; stale cleanup removes all files | T010 | +| SC-008 | Unit and integration tests pass | T015, T016 | +| SC-009 | `go vet` and `gofmt` pass with zero issues | T019 | +| SC-010 | CI workflow validates PRs with test, sync, build; deploy workflow adds vet/gofmt | T014 | +| SC-011 | Path traversal prevention rejects paths escaping `--output` directory | T028, T037 | +| SC-012 | Context-aware retry sleep respects cancellation promptly | T029, T037 | +| SC-013 | Stale cleanup removes all generated files (overview.md, doc sub-pages), not just `_index.md` | T030, T037 | +| SC-014 | `--lock` gates content to approved SHAs; unapproved repos are skipped | `lock_test.go`, `sync_test.go` (`TestProcessRepo_LockedSHA`) | +| SC-015 | `--update-lock` writes current upstream SHAs to lockfile | `lock_test.go` (`TestWriteLock`, `TestWriteLock_DeterministicOrder`) | +| SC-016 | Weekly check workflow creates/updates a PR with lockfile changes | `sync-content-check.yml` manual dispatch | + +## Merge Readiness Gate + +All 16 success criteria (SC-001 through SC-016) MUST pass before merging feature 006 to `main`. SC-006 is deferred (blocked on config sources being declared) but its code paths are covered by unit tests (`TestSyncConfigSource`, `TestProcessRepo`). SC-016 requires a manual `workflow_dispatch` run of `sync-content-check.yml` after merge. + +## Appendix: Legacy ID Cross-Reference + +The In Scope table above uses consolidated IDs. Earlier development phases used a more granular implementation status table with additional IDs. Tasks in `tasks.md` reference some of these legacy IDs. This table maps them to their current equivalents for traceability. + +| Legacy ID | Current Mapping | Context | +|-----------|----------------|---------| +| IS-010 | NFR-007 | Gitignore patterns for generated repo pages | +| IS-011 | NFR-007 | Gitignore patterns for landing page cards | +| IS-032 | SC-008 | Unit test requirement | +| IS-050 | Constitution III | Remove hand-maintained committed project docs | +| IS-051 | IS-018 | CI integration outputs (GITHUB_OUTPUT, step summary) | +| IS-052 | Constitution III, IV | Constitution memory file sync to v1.5.0 | +| IS-060–IS-065 | — | Implementation status tracking items (historical; used during T021 final sweep) | +| IS-061 | Inherited Capabilities | Context cancellation in retry sleep (referenced in spec Inherited Capabilities section) | +| IS-071 | IS-070 | Content lockfile — `readLock`/`writeLock` implementation | +| IS-072 | IS-070 | Content lockfile — `ref` parameter threading through API methods | +| IS-073 | IS-001 | Governance-driven discovery via `peribolos.yaml` (Constitution v1.5.0 update) | diff --git a/sync-config.yaml b/sync-config.yaml new file mode 100644 index 0000000..0503cd7 --- /dev/null +++ b/sync-config.yaml @@ -0,0 +1,43 @@ +# sync-config.yaml — declarative file sync manifest +# +# This config layers precise file-level syncs on top of the org-wide scan. +# Run with: go run ./cmd/sync-content --org complytime --config sync-config.yaml --write +# +# See specs/006-go-sync-tool/spec.md for full documentation. +# +# ───────────────────────────────────────────────────────────────────── +# Hybrid mode: +# +# The org scan remains the default baseline — every repo in the org +# gets a ProjectCard and an auto-generated project page from its +# README. This config adds precision where needed: +# +# skip_org_sync: true → suppress the auto-generated project page +# for this repo; only the config-declared +# files are synced. The ProjectCard is still +# built from API metadata. +# +# skip_org_sync: false → (default) the org scan runs normally AND +# the config-declared files are synced as +# additional content. +# ───────────────────────────────────────────────────────────────────── + +defaults: + branch: main + +discovery: + ignore_repos: + - .github + - website + - community + - org-infra + - complytime-demos + - complytime-policies + - complytime-collector-distro + scan_paths: + - docs + ignore_files: + - CHANGELOG.md + - CODE_OF_CONDUCT.md + +sources: []