Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .agent/rules/auto-generated-files.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ globs:
- "bundle/internal/schema/annotations_openapi.yml"
- "bundle/internal/validation/generated/*.go"
- "bundle/schema/jsonschema.json"
- "bundle/schema/jsonschema_for_docs.json"
- "python/databricks/bundles/version.py"
- "python/databricks/bundles/*/__init__.py"
- "python/databricks/bundles/*/_models/*.py"
Expand All @@ -40,7 +39,6 @@ paths:
- "bundle/internal/schema/annotations_openapi.yml"
- "bundle/internal/validation/generated/*.go"
- "bundle/schema/jsonschema.json"
- "bundle/schema/jsonschema_for_docs.json"
- "python/databricks/bundles/version.py"
- "python/databricks/bundles/*/__init__.py"
- "python/databricks/bundles/*/_models/*.py"
Expand Down
65 changes: 28 additions & 37 deletions .github/workflows/update-schema-docs.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
name: update-schema-docs

# Regenerate bundle/schema/jsonschema_for_docs.json after every release and
# publish it to the `docgen` branch.
# Regenerate the bundle docs JSON schema after every release and publish it to
# the `docgen` branch. The docs schema is NOT kept on main — `docgen` is its
# only home (downstream docs tooling reads it from there).
#
# bundle/internal/schema/since_version.go derives `x-since-version` annotations
# from the list of `v*` git tags that exist when the schema is generated. The
# `docgen` branch is therefore stale by one release as soon as the next tag is
# pushed; this workflow keeps it current.
# from the list of `v*` git tags that exist when the schema is generated, plus an
# append-only since-versions state (bundle/schema/since_versions.json) that also
# lives on `docgen`. The generator regenerates the schema straight into the
# docgen worktree, reads and refreshes that state (recorded versions never
# change), and this workflow commits both back to `docgen` — so annotations stay
# stable across schema refactors without keeping anything on main.

on:
push:
Expand Down Expand Up @@ -68,54 +72,41 @@ jobs:
echo "tag=$tag" >> "$GITHUB_OUTPUT"
echo "Publishing for tag $tag"

- name: Regenerate jsonschema_for_docs.json
run: go tool -modfile=tools/task/go.mod task --force generate-schema-docs

# Fail loudly if regeneration touches anything other than the docs schema.
# Anything else (annotations.yml, untracked files, ...) is a bug in the
# generator, not something we want to silently publish.
- name: Assert only jsonschema_for_docs.json changed on main
run: |
changed=$(git status --porcelain)
expected=" M bundle/schema/jsonschema_for_docs.json"
if [ -z "$changed" ]; then
echo "Regeneration produced no diff against main."
exit 0
fi
if [ "$changed" != "$expected" ]; then
echo "Expected only bundle/schema/jsonschema_for_docs.json to be modified."
echo "Actual git status --porcelain:"
echo "$changed"
exit 1
fi

- name: Capture regenerated file
run: |
mkdir -p "$RUNNER_TEMP/regen"
cp bundle/schema/jsonschema_for_docs.json "$RUNNER_TEMP/regen/jsonschema_for_docs.json"

# Check out docgen first: it holds the append-only since-versions state
# (bundle/schema/since_versions.json) that the generator reads and refreshes.
- name: Check out docgen worktree
run: |
git fetch origin docgen
git worktree add "$RUNNER_TEMP/docgen" origin/docgen

- name: Stage regenerated file on docgen
working-directory: ${{ runner.temp }}/docgen
# Generate the docs schema straight into the docgen worktree. The docs
# schema is no longer kept on main — docgen is its only home, alongside the
# append-only since-versions state.
- name: Regenerate docs schema into the docgen worktree
env:
# since_version.go reads this append-only state and refreshes it
# (recorded versions never change). Both the schema and the state live
# on docgen, not in the main source tree.
DATABRICKS_SINCE_VERSIONS_FILE: ${{ runner.temp }}/docgen/bundle/schema/since_versions.json
run: |
mkdir -p bundle/schema
cp "$RUNNER_TEMP/regen/jsonschema_for_docs.json" bundle/schema/jsonschema_for_docs.json
git add bundle/schema/jsonschema_for_docs.json
# since_version.go reads `git tag --list 'v*'` to discover newly added
# fields; make sure tags are present (no-op when already fetched).
git fetch origin 'refs/tags/v*:refs/tags/v*' || true
mkdir -p "$RUNNER_TEMP/docgen/bundle/schema"
go run ./bundle/internal/schema ./bundle/internal/schema \
"$RUNNER_TEMP/docgen/bundle/schema/jsonschema_for_docs.json" --docs

- name: Commit and push to docgen
working-directory: ${{ runner.temp }}/docgen
env:
TAG: ${{ steps.tag.outputs.tag }}
run: |-
git add bundle/schema/jsonschema_for_docs.json bundle/schema/since_versions.json
if git diff --cached --quiet; then
echo "docgen already up to date for ${TAG}; nothing to commit."
exit 0
fi
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
git commit -m "Update jsonschema_for_docs.json for ${TAG}"
git commit -m "Update jsonschema_for_docs.json and since_versions.json for ${TAG}"
git push origin HEAD:docgen
1 change: 0 additions & 1 deletion .wsignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
.codegen/_openapi_sha
.release_metadata.json
bundle/schema/jsonschema.json
bundle/schema/jsonschema_for_docs.json
python/docs/images/databricks-logo.svg
**/*.dist-info/METADATA
**/*.dist-info/WHEEL
Expand Down
16 changes: 0 additions & 16 deletions Taskfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ tasks:
# committed). Run `./task generate` explicitly when codegen inputs change.
- task: generate-refschema
- task: generate-schema
- task: generate-schema-docs
- task: generate-validation
- task: generate-docs
- task: generate-direct
Expand Down Expand Up @@ -717,7 +716,6 @@ tasks:
# generate-direct-apitypes and generate-direct-resources below.
- task: generate-refschema
- task: generate-schema
- task: generate-schema-docs
- task: generate-validation
- task: generate-docs
- task: generate-direct
Expand Down Expand Up @@ -811,20 +809,6 @@ tasks:
cmds:
- "go run ./bundle/internal/schema ./bundle/internal/schema ./bundle/schema/jsonschema.json"

generate-schema-docs:
desc: Generate bundle JSON schema for documentation
sources: *SCHEMA_SOURCES
generates:
- bundle/schema/jsonschema_for_docs.json
- bundle/internal/schema/annotations.yml
cmds:
# since_version.go reads `git tag --list 'v*'` to compute sinceVersion
# annotations. Without tags (e.g. shallow clone), those annotations are
# silently dropped from the output. Restore the fetch that lived in the
# old tools/post-generate.sh.
- git fetch origin 'refs/tags/v*:refs/tags/v*'
- "go run ./bundle/internal/schema ./bundle/internal/schema ./bundle/schema/jsonschema_for_docs.json --docs"

generate-docs:
desc: Generate bundle documentation
sources:
Expand Down
4 changes: 3 additions & 1 deletion bundle/internal/schema/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,9 @@ func generateSchema(workdir, outputFile string, docsMode bool) {
log.Fatal(err)
}

// In docs mode, add sinceVersion annotations by analyzing git history.
// In docs mode, add sinceVersion annotations. When DATABRICKS_SINCE_VERSIONS_FILE
// is set (by the update-schema-docs workflow) these come from the persisted,
// append-only state on docgen; otherwise they are computed from git history.
if docsMode {
sinceVersions, err := computeSinceVersions()
if err != nil {
Expand Down
131 changes: 128 additions & 3 deletions bundle/internal/schema/since_version.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,147 @@ package main

import (
"encoding/json"
"errors"
"fmt"
"io/fs"
"maps"
"os"
"os/exec"
"strconv"
"strings"

"github.com/databricks/cli/libs/jsonschema"
)

// sinceVersionsStateEnv names the env var that points at the persisted,
// append-only since-versions state file. It is set by the update-schema-docs
// workflow to a checkout of the `docgen` branch, so the state is stored and
// updated there (never in the main source tree). When unset (local `task
// generate`, regular CI) since versions are computed from git history only and
// nothing is persisted.
const sinceVersionsStateEnv = "DATABRICKS_SINCE_VERSIONS_FILE"

// sinceVersionAliases maps a current "typePath.fieldName" key to the key it was
// previously known by. When a Go type is renamed, moved, or retyped (e.g. the
// shared Permission struct being split into per-resource typed structs), the new
// key would otherwise look brand new and be stamped with the current release
// version. Listing the rename here lets the new key inherit the original key's
// since version, keeping it stable across refactors.
//
// Example:
//
// "github.com/databricks/cli/bundle/config/resources.AppPermission.user_name":
// "github.com/databricks/cli/bundle/config/resources.Permission.user_name",
var sinceVersionAliases = map[string]string{}

// Version when bundle/schema/jsonschema.json was added to the repo.
var embeddedSchemaVersion = [3]int{0, 229, 0}

// computeSinceVersions computes when each field was first introduced by analyzing git history.
// It returns a map from "typePath.fieldName" to the version string (e.g., "v0.229.0").
// This function always recomputes versions at runtime without storing state.
// computeSinceVersions returns the "typePath.fieldName" -> version map used to
// annotate the schema.
//
// When DATABRICKS_SINCE_VERSIONS_FILE is unset, versions are computed purely
// from git history (the original behavior) and nothing is persisted.
//
// When it is set (by the update-schema-docs workflow, pointing at a docgen
// checkout), the state there is treated as append-only and authoritative:
// 1. Load the stored map (missing file is treated as empty — the first run
// seeds it).
// 2. Recompute first-observed versions from git history to discover newly
// added fields.
// 3. Merge: stored entries win, so a recorded version never changes even if a
// field's Go type is later renamed or retyped; brand-new fields take their
// computed version; renamed fields inherit via sinceVersionAliases.
// 4. Write the merged map back so newly discovered fields become frozen too.
// The workflow then commits it to docgen.
func computeSinceVersions() (map[string]string, error) {
computed, err := computeSinceVersionsFromHistory()

statePath := os.Getenv(sinceVersionsStateEnv)
if statePath == "" {
// No persisted state: legacy behavior (annotate from history, or surface
// the error so the caller skips annotation).
return computed, err
}

if err != nil {
// Without git history/tags we can still annotate from the stored state.
fmt.Printf("Warning: could not compute since versions from git history: %v\n", err)
computed = map[string]string{}
}

stored, err := loadStoredSinceVersions(statePath)
if err != nil {
return nil, err
}

merged := mergeSinceVersions(computed, stored, sinceVersionAliases)

if err := saveStoredSinceVersions(statePath, merged); err != nil {
return nil, fmt.Errorf("writing %s: %w", statePath, err)
}
return merged, nil
}

// mergeSinceVersions combines freshly computed versions with the stored map.
//
// Stored entries are authoritative and never overwritten (append-only), which is
// what makes versions stable across refactors. Fields not yet stored take their
// computed (first-observed) version. A renamed field whose new key is not yet
// stored inherits its previous key's version via aliases.
func mergeSinceVersions(computed, stored, aliases map[string]string) map[string]string {
result := make(map[string]string, len(computed)+len(stored))
maps.Copy(result, computed)

for newKey, oldKey := range aliases {
if _, frozen := stored[newKey]; frozen {
continue
}
if v, ok := stored[oldKey]; ok {
result[newKey] = v
} else if v, ok := computed[oldKey]; ok {
result[newKey] = v
}
}

// Stored wins: a recorded version is the canonical "first observed" answer.
maps.Copy(result, stored)
return result
}

// loadStoredSinceVersions reads the persisted since-version map. A missing file
// is not an error (returns an empty map) so the generator works on a fresh
// checkout that has not recorded versions yet.
func loadStoredSinceVersions(path string) (map[string]string, error) {
data, err := os.ReadFile(path)
if errors.Is(err, fs.ErrNotExist) {
return map[string]string{}, nil
}
if err != nil {
return nil, fmt.Errorf("reading %s: %w", path, err)
}
stored := map[string]string{}
if err := json.Unmarshal(data, &stored); err != nil {
return nil, fmt.Errorf("parsing %s: %w", path, err)
}
return stored, nil
}

// saveStoredSinceVersions writes the map back deterministically (sorted keys via
// json.MarshalIndent, trailing newline) so the committed file stays diff-stable.
func saveStoredSinceVersions(path string, versions map[string]string) error {
b, err := json.MarshalIndent(versions, "", " ")
if err != nil {
return err
}
b = append(b, '\n')
return os.WriteFile(path, b, 0o644)
}

// computeSinceVersionsFromHistory computes when each field was first introduced
// by analyzing git history. It returns a map from "typePath.fieldName" to the
// version string (e.g., "v0.229.0").
func computeSinceVersionsFromHistory() (map[string]string, error) {
versions, err := getVersionTags()
if err != nil {
return nil, fmt.Errorf("getting version tags: %w", err)
Expand Down
Loading