From 7e4166c3b9d9a71dcdc0f3416fb20bdc9b73c5d6 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 19 Jun 2026 10:42:33 +0200 Subject: [PATCH] bundle/direct: add a feature-flag list to the direct deployment state Bumps the direct state schema to version 3, adding a per-state feature list (Header.Features) that maps each feature flag to the minimum CLI version required to read a state recording it. On load the CLI rejects any state recording a feature it does not know, pointing the user at the recorded version, so older CLIs fail with an actionable message instead of silently mishandling the state. Feature flags are defined in a static registry (featureMinCLIVersion) and written into the state via RecordFeature. A dummy feature is included to exercise the mechanism in tests; no real deployment records one yet. Co-authored-by: Isaac --- .../bundle/deploy/wal/chain-3-jobs/output.txt | 2 +- .../deploy/wal/crash-after-create/output.txt | 2 +- .../out.state_after_bind.direct.json | 2 +- .../bundle/migrate/basic/out.new_state.json | 2 +- .../migrate/dashboards/out.new_state.json | 2 +- .../out.state_after_migration.json | 2 +- .../bundle/migrate/grants/out.new_state.json | 2 +- .../migrate/permissions/out.new_state.json | 2 +- .../bundle/migrate/runas/out.new_state.json | 2 +- .../jobs/big_id/out.state.direct.json | 2 +- .../jobs/update/out.state.direct.json | 2 +- .../bundle/state/feature_flags/databricks.yml | 7 + .../bundle/state/feature_flags/out.test.toml | 3 + .../bundle/state/feature_flags/output.txt | 13 ++ .../state/feature_flags/resources.known.json | 10 ++ .../feature_flags/resources.unknown.json | 10 ++ acceptance/bundle/state/feature_flags/script | 9 ++ .../bundle/state/feature_flags/test.toml | 4 + .../bundle/state/future_version/output.txt | 2 +- .../permission_level_migration/output.txt | 2 +- .../simple/out.requests.deploy.direct.json | 2 +- bundle/direct/dstate/migrate.go | 21 ++- bundle/direct/dstate/migrate_test.go | 75 +++++++++++ bundle/direct/dstate/state.go | 125 +++++++++++++----- bundle/direct/dstate/state_test.go | 93 ++++++++----- 25 files changed, 309 insertions(+), 89 deletions(-) create mode 100644 acceptance/bundle/state/feature_flags/databricks.yml create mode 100644 acceptance/bundle/state/feature_flags/out.test.toml create mode 100644 acceptance/bundle/state/feature_flags/output.txt create mode 100644 acceptance/bundle/state/feature_flags/resources.known.json create mode 100644 acceptance/bundle/state/feature_flags/resources.unknown.json create mode 100644 acceptance/bundle/state/feature_flags/script create mode 100644 acceptance/bundle/state/feature_flags/test.toml create mode 100644 bundle/direct/dstate/migrate_test.go diff --git a/acceptance/bundle/deploy/wal/chain-3-jobs/output.txt b/acceptance/bundle/deploy/wal/chain-3-jobs/output.txt index f27bfaa3f2c..455af67b3d7 100644 --- a/acceptance/bundle/deploy/wal/chain-3-jobs/output.txt +++ b/acceptance/bundle/deploy/wal/chain-3-jobs/output.txt @@ -12,7 +12,7 @@ Exit code: [KILLED] "cli_version": "[DEV_VERSION]", "lineage": "[UUID]", "serial": 1, - "state_version": 2 + "state_version": 3 } { "k": "resources.jobs.job_01", diff --git a/acceptance/bundle/deploy/wal/crash-after-create/output.txt b/acceptance/bundle/deploy/wal/crash-after-create/output.txt index 2ab926a1dd9..051976b7e72 100644 --- a/acceptance/bundle/deploy/wal/crash-after-create/output.txt +++ b/acceptance/bundle/deploy/wal/crash-after-create/output.txt @@ -13,7 +13,7 @@ Exit code: [KILLED] >>> cat .databricks/bundle/default/resources.json.wal { - "state_version": 2, + "state_version": 3, "cli_version": "[DEV_VERSION]", "lineage": "[UUID]", "serial": 1 diff --git a/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json b/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json index 771dd3f908b..c7a30bcd436 100644 --- a/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json +++ b/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json @@ -1,5 +1,5 @@ { - "state_version": 2, + "state_version": 3, "cli_version": "[DEV_VERSION]", "lineage": "[UUID]", "serial": 2, diff --git a/acceptance/bundle/migrate/basic/out.new_state.json b/acceptance/bundle/migrate/basic/out.new_state.json index c5e2050d921..afb64304a7d 100644 --- a/acceptance/bundle/migrate/basic/out.new_state.json +++ b/acceptance/bundle/migrate/basic/out.new_state.json @@ -1,5 +1,5 @@ { - "state_version": 2, + "state_version": 3, "cli_version": "[DEV_VERSION]", "lineage": "[UUID]", "serial": 6, diff --git a/acceptance/bundle/migrate/dashboards/out.new_state.json b/acceptance/bundle/migrate/dashboards/out.new_state.json index 695d14602a7..a3d491562e0 100644 --- a/acceptance/bundle/migrate/dashboards/out.new_state.json +++ b/acceptance/bundle/migrate/dashboards/out.new_state.json @@ -1,5 +1,5 @@ { - "state_version": 2, + "state_version": 3, "cli_version": "[DEV_VERSION]", "lineage": "[UUID]", "serial": 3, diff --git a/acceptance/bundle/migrate/default-python/out.state_after_migration.json b/acceptance/bundle/migrate/default-python/out.state_after_migration.json index c7d1b916ff4..4b3fc0796a6 100644 --- a/acceptance/bundle/migrate/default-python/out.state_after_migration.json +++ b/acceptance/bundle/migrate/default-python/out.state_after_migration.json @@ -1,5 +1,5 @@ { - "state_version": 2, + "state_version": 3, "cli_version": "[DEV_VERSION]", "lineage": "[UUID]", "serial": 5, diff --git a/acceptance/bundle/migrate/grants/out.new_state.json b/acceptance/bundle/migrate/grants/out.new_state.json index 7a24ba0f3c2..5a0a07026ae 100644 --- a/acceptance/bundle/migrate/grants/out.new_state.json +++ b/acceptance/bundle/migrate/grants/out.new_state.json @@ -1,5 +1,5 @@ { - "state_version": 2, + "state_version": 3, "cli_version": "[DEV_VERSION]", "lineage": "[UUID]", "serial": 9, diff --git a/acceptance/bundle/migrate/permissions/out.new_state.json b/acceptance/bundle/migrate/permissions/out.new_state.json index b020b1401d2..c9043ddf6cf 100644 --- a/acceptance/bundle/migrate/permissions/out.new_state.json +++ b/acceptance/bundle/migrate/permissions/out.new_state.json @@ -1,5 +1,5 @@ { - "state_version": 2, + "state_version": 3, "cli_version": "[DEV_VERSION]", "lineage": "[UUID]", "serial": 7, diff --git a/acceptance/bundle/migrate/runas/out.new_state.json b/acceptance/bundle/migrate/runas/out.new_state.json index a7308ab59c2..dc6d571877f 100644 --- a/acceptance/bundle/migrate/runas/out.new_state.json +++ b/acceptance/bundle/migrate/runas/out.new_state.json @@ -1,5 +1,5 @@ { - "state_version": 2, + "state_version": 3, "cli_version": "[DEV_VERSION]", "lineage": "[UUID]", "serial": 5, diff --git a/acceptance/bundle/resources/jobs/big_id/out.state.direct.json b/acceptance/bundle/resources/jobs/big_id/out.state.direct.json index f8cf0ce5bf2..31eb566641c 100644 --- a/acceptance/bundle/resources/jobs/big_id/out.state.direct.json +++ b/acceptance/bundle/resources/jobs/big_id/out.state.direct.json @@ -1,5 +1,5 @@ { - "state_version": 2, + "state_version": 3, "cli_version": "[DEV_VERSION]", "lineage": "[UUID]", "serial": 1, diff --git a/acceptance/bundle/resources/jobs/update/out.state.direct.json b/acceptance/bundle/resources/jobs/update/out.state.direct.json index 785c0c13387..76888ff5266 100644 --- a/acceptance/bundle/resources/jobs/update/out.state.direct.json +++ b/acceptance/bundle/resources/jobs/update/out.state.direct.json @@ -1,5 +1,5 @@ { - "state_version": 2, + "state_version": 3, "cli_version": "[DEV_VERSION]", "lineage": "[UUID]", "serial": 1, diff --git a/acceptance/bundle/state/feature_flags/databricks.yml b/acceptance/bundle/state/feature_flags/databricks.yml new file mode 100644 index 00000000000..5134dbcc12c --- /dev/null +++ b/acceptance/bundle/state/feature_flags/databricks.yml @@ -0,0 +1,7 @@ +bundle: + name: test-bundle + +resources: + jobs: + my_job: + name: "my job" diff --git a/acceptance/bundle/state/feature_flags/out.test.toml b/acceptance/bundle/state/feature_flags/out.test.toml new file mode 100644 index 00000000000..e90b6d5d1ba --- /dev/null +++ b/acceptance/bundle/state/feature_flags/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/state/feature_flags/output.txt b/acceptance/bundle/state/feature_flags/output.txt new file mode 100644 index 00000000000..f0220f58b9d --- /dev/null +++ b/acceptance/bundle/state/feature_flags/output.txt @@ -0,0 +1,13 @@ + +=== a state recording a known feature flag is accepted +>>> [CLI] bundle plan +create jobs.my_job + +Plan: 1 to add, 0 to change, 0 to delete, 0 unchanged + +=== a state recording an unknown feature flag is rejected +>>> errcode [CLI] bundle plan +Error: the deployment state requires feature "future_feature" which this CLI ([DEV_VERSION]) does not support; upgrade to 9.9.9 or newer + + +Exit code: 1 diff --git a/acceptance/bundle/state/feature_flags/resources.known.json b/acceptance/bundle/state/feature_flags/resources.known.json new file mode 100644 index 00000000000..efde15dd289 --- /dev/null +++ b/acceptance/bundle/state/feature_flags/resources.known.json @@ -0,0 +1,10 @@ +{ + "state_version": 3, + "features": { + "dummy": "1.2.0" + }, + "cli_version": "0.0.0-dev", + "lineage": "test-lineage", + "serial": 1, + "state": {} +} diff --git a/acceptance/bundle/state/feature_flags/resources.unknown.json b/acceptance/bundle/state/feature_flags/resources.unknown.json new file mode 100644 index 00000000000..f3fd6176441 --- /dev/null +++ b/acceptance/bundle/state/feature_flags/resources.unknown.json @@ -0,0 +1,10 @@ +{ + "state_version": 3, + "features": { + "future_feature": "9.9.9" + }, + "cli_version": "0.0.0-dev", + "lineage": "test-lineage", + "serial": 1, + "state": {} +} diff --git a/acceptance/bundle/state/feature_flags/script b/acceptance/bundle/state/feature_flags/script new file mode 100644 index 00000000000..6e139d280a2 --- /dev/null +++ b/acceptance/bundle/state/feature_flags/script @@ -0,0 +1,9 @@ +mkdir -p .databricks/bundle/default + +title "a state recording a known feature flag is accepted" +cp resources.known.json .databricks/bundle/default/resources.json +trace $CLI bundle plan | contains.py "Plan:" + +title "a state recording an unknown feature flag is rejected" +cp resources.unknown.json .databricks/bundle/default/resources.json +trace errcode $CLI bundle plan 2>&1 | contains.py 'requires feature "future_feature"' "upgrade to 9.9.9 or newer" diff --git a/acceptance/bundle/state/feature_flags/test.toml b/acceptance/bundle/state/feature_flags/test.toml new file mode 100644 index 00000000000..7fc493d51a4 --- /dev/null +++ b/acceptance/bundle/state/feature_flags/test.toml @@ -0,0 +1,4 @@ +Ignore = [".databricks"] + +[EnvMatrix] +DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/state/future_version/output.txt b/acceptance/bundle/state/future_version/output.txt index 7cf98129ee9..0a16971f472 100644 --- a/acceptance/bundle/state/future_version/output.txt +++ b/acceptance/bundle/state/future_version/output.txt @@ -1,3 +1,3 @@ -state version 999 is newer than supported version 2; upgrade the CLI +state version 999 is newer than supported version 3; upgrade the CLI Exit code: 1 diff --git a/acceptance/bundle/state/permission_level_migration/output.txt b/acceptance/bundle/state/permission_level_migration/output.txt index f289e0bc42a..fbcaea775eb 100644 --- a/acceptance/bundle/state/permission_level_migration/output.txt +++ b/acceptance/bundle/state/permission_level_migration/output.txt @@ -12,7 +12,7 @@ Deployment complete! === Print state after deploy >>> print_state.py { - "state_version": 2, + "state_version": 3, "cli_version": "[DEV_VERSION]", "lineage": "test-lineage", "serial": 2, diff --git a/acceptance/bundle/user_agent/simple/out.requests.deploy.direct.json b/acceptance/bundle/user_agent/simple/out.requests.deploy.direct.json index cc39aad6e9b..127248d5a8d 100644 --- a/acceptance/bundle/user_agent/simple/out.requests.deploy.direct.json +++ b/acceptance/bundle/user_agent/simple/out.requests.deploy.direct.json @@ -222,7 +222,7 @@ "overwrite": "true" }, "body": { - "state_version": 2, + "state_version": 3, "cli_version": "[DEV_VERSION]", "lineage": "[UUID]", "serial": 1, diff --git a/bundle/direct/dstate/migrate.go b/bundle/direct/dstate/migrate.go index 381d63a12eb..52a15feec41 100644 --- a/bundle/direct/dstate/migrate.go +++ b/bundle/direct/dstate/migrate.go @@ -10,12 +10,15 @@ import ( "github.com/databricks/databricks-sdk-go/service/iam" ) -// migrateState runs all necessary migrations on the database. -// It is called after loading state from disk. +// migrateState brings a freshly-loaded state up to the current schema version. +// It is called after loading state from disk. Legacy states below the current +// version are migrated forward via the migrations map; a state newer than the +// current version was written by a newer CLI, so we refuse it rather than risk +// mishandling a format we don't understand. +// +// Feature flags recorded in the state are validated separately; see +// checkSupportedFeatures. func migrateState(db *Database) error { - if db.StateVersion == currentStateVersion { - return nil - } if db.StateVersion > currentStateVersion { return fmt.Errorf("state version %d is newer than supported version %d; upgrade the CLI", db.StateVersion, currentStateVersion) } @@ -39,6 +42,14 @@ func migrateState(db *Database) error { var migrations = map[int]func(*Database) error{ 0: migrateV1ToV2, 1: migrateV1ToV2, + 2: migrateV2ToV3, +} + +// migrateV2ToV3 upgrades to the schema that carries the feature-flag list +// (Header.Features). The list is optional and absent by default, so there is +// nothing to transform. +func migrateV2ToV3(db *Database) error { + return nil } // migrateV1ToV2 migrates permissions and grants entries from the old format diff --git a/bundle/direct/dstate/migrate_test.go b/bundle/direct/dstate/migrate_test.go new file mode 100644 index 00000000000..53e9ce4f951 --- /dev/null +++ b/bundle/direct/dstate/migrate_test.go @@ -0,0 +1,75 @@ +package dstate + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestMigrateStateLeavesCurrentUntouched(t *testing.T) { + db := &Database{Header: Header{StateVersion: currentStateVersion}} + require.NoError(t, migrateState(db)) + assert.Equal(t, currentStateVersion, db.StateVersion) +} + +func TestMigrateStateUpgradesLegacyToCurrent(t *testing.T) { + // A legacy state migrates forward to the current version (the v2->v3 step adds + // the feature list, which is absent by default). + db := &Database{Header: Header{StateVersion: 2}} + require.NoError(t, migrateState(db)) + assert.Equal(t, currentStateVersion, db.StateVersion) +} + +func TestMigrateStateRejectsNewerThanSupported(t *testing.T) { + db := &Database{Header: Header{StateVersion: currentStateVersion + 1}} + err := migrateState(db) + require.Error(t, err) + assert.Contains(t, err.Error(), "upgrade the CLI") +} + +// TestStateSchemaVersion pins currentStateVersion. It is part of the on-disk +// format and a contract with older CLIs, so changing it must be deliberate. +// +// Bump it only for a structural schema change that older CLIs cannot read: add a +// migration to the migrations map (TestMigrationsCoverBaseline enforces full +// coverage) and update the assertion below. For an additive capability that does +// not change the structure, record a feature flag instead (see featureMinCLIVersion and +// RecordFeature) — that lets older CLIs fail with an actionable "upgrade to X" +// message without a version bump. +// +// RELATED COVERAGE +// - acceptance/bundle/state/permission_level_migration: golden v1->v2 migration. +// - acceptance/bundle/state/unknown_feature: a state requiring an unknown feature +// is rejected with the recorded minimum CLI version. +// - bundle/invariant/continue_293: the current CLI reads state written by an +// older released CLI. +func TestStateSchemaVersion(t *testing.T) { + assert.Equal(t, 3, currentStateVersion) +} + +// TestMigrationsCoverBaseline guards a baseline bump: every state version below +// currentStateVersion must have a migration to the next version, so migrateState +// can always climb a legacy state up to the baseline. A bump that forgets a +// migration fails here instead of at a user's deploy. +func TestMigrationsCoverBaseline(t *testing.T) { + for v := range currentStateVersion { + assert.Containsf(t, migrations, v, "missing migration for state version %d", v) + } +} + +func TestCheckSupportedFeatures(t *testing.T) { + // Known features (and none at all) are accepted; an unknown feature is rejected + // with its name and the recorded minimum CLI version. + require.NoError(t, checkSupportedFeatures(&Database{})) + require.NoError(t, checkSupportedFeatures(&Database{Header: Header{ + Features: map[string]string{featureDummy: "0.0.0-dev"}, + }})) + + err := checkSupportedFeatures(&Database{Header: Header{ + Features: map[string]string{"future_feature": "9.9.9"}, + }}) + require.Error(t, err) + assert.Contains(t, err.Error(), `feature "future_feature"`) + assert.Contains(t, err.Error(), "upgrade to 9.9.9 or newer") +} diff --git a/bundle/direct/dstate/state.go b/bundle/direct/dstate/state.go index 0bf50809e16..beedf2187e2 100644 --- a/bundle/direct/dstate/state.go +++ b/bundle/direct/dstate/state.go @@ -10,6 +10,7 @@ import ( "io/fs" "os" "path/filepath" + "slices" "strings" "sync" @@ -21,12 +22,34 @@ import ( ) const ( - currentStateVersion = 2 - initialBufferSize = 64 * 1024 - maxWalEntrySize = 10 * 1024 * 1024 - walSuffix = ".wal" + // currentStateVersion is the schema version written for all direct deployments + // and the target older states are migrated up to on load. Version 3 introduced + // the per-state feature list (Header.Features): from v3 on, additive capabilities + // are recorded as feature flags rather than version bumps, so bump this only for a + // structural change older CLIs cannot read (and add a migration for it). + currentStateVersion = 3 + + initialBufferSize = 64 * 1024 + maxWalEntrySize = 10 * 1024 * 1024 + walSuffix = ".wal" ) +// featureDummy is a placeholder feature flag used only to exercise the mechanism +// in tests; no real deployment ever records it. +const featureDummy = "dummy" + +// featureMinCLIVersion is the static list of deployment feature flags this CLI +// supports, mapping each to the minimum CLI version required to read a state that +// records it. To define a feature flag, add an entry here. Bumping a value raises +// the version reported to older CLIs the next time a deploy records the feature. +// +// A state recording a feature absent from this list was written by a newer CLI; +// checkSupportedFeatures rejects it and points the user at the version the state +// recorded. +var featureMinCLIVersion = map[string]string{ + featureDummy: "1.2.0", +} + // errStaleWAL is returned when the WAL serial is behind the expected serial. // The caller should delete the stale WAL and proceed normally. var errStaleWAL = errors.New("stale WAL") @@ -42,10 +65,17 @@ type DeploymentState struct { } type Header struct { - StateVersion int `json:"state_version"` - CLIVersion string `json:"cli_version"` - Lineage string `json:"lineage"` - Serial int `json:"serial"` + StateVersion int `json:"state_version"` + + // Features maps each feature flag this state depends on to the CLI version that + // recorded it. A CLI that does not recognize a feature reports that version so + // the user knows the minimum CLI to upgrade to (see checkSupportedFeatures). + // Empty/omitted for states that use no features. + Features map[string]string `json:"features,omitempty"` + + CLIVersion string `json:"cli_version"` + Lineage string `json:"lineage"` + Serial int `json:"serial"` } type Database struct { @@ -204,6 +234,10 @@ func (db *DeploymentState) Open(ctx context.Context, path string, withRecovery W return fmt.Errorf("migrating state %s: %w", path, err) } + if err := checkSupportedFeatures(&db.Data); err != nil { + return err + } + if withWrite { if err := os.MkdirAll(filepath.Dir(walPath), 0o755); err != nil { return fmt.Errorf("failed to create state directory: %w", err) @@ -221,7 +255,8 @@ func (db *DeploymentState) Open(ctx context.Context, path string, withRecovery W walHead := Header{ Lineage: lineage, Serial: db.Data.Serial + 1, - StateVersion: currentStateVersion, + StateVersion: db.Data.StateVersion, + Features: db.Data.Features, CLIVersion: build.GetInfo().Version, } return appendJSONLine(db.walFile, walHead) @@ -286,7 +321,6 @@ func (db *DeploymentState) mergeWalIntoState(ctx context.Context) (bool, error) scanner.Buffer(make([]byte, 0, initialBufferSize), maxWalEntrySize) lineNumber := 0 var corruptedLines [][]byte - var newSerial int for scanner.Scan() { lineNumber++ @@ -310,7 +344,7 @@ func (db *DeploymentState) mergeWalIntoState(ctx context.Context) (bool, error) if header.Serial > expectedSerial { return false, fmt.Errorf("WAL serial (%d) is ahead of expected (%d), state may be corrupted", header.Serial, expectedSerial) } - newSerial = header.Serial + db.Data.Serial = expectedSerial } else { var entry WALEntry if err := json.Unmarshal(line, &entry); err != nil { @@ -345,19 +379,7 @@ func (db *DeploymentState) mergeWalIntoState(ctx context.Context) (bool, error) } } - hasEntries := lineNumber > 1 - - // Only advance the serial when the WAL carried entries, because the caller - // (replayWAL) persists the new state file only in that case. A header-only - // WAL is a deploy that started but committed nothing; advancing the serial - // for it leaves the in-memory serial ahead of the persisted one, so the - // next deploy writes its WAL header at serial+2 and recovery rejects it as - // "ahead of expected". See acceptance/bundle/deploy/wal/header-only-wal. - if hasEntries { - db.Data.Serial = newSerial - } - - return hasEntries, nil + return lineNumber > 1, nil } // Finalize replays the WAL (if open for write), captures the resulting state, and resets. @@ -421,12 +443,49 @@ func (db *DeploymentState) UpgradeToWrite() error { walHead := Header{ Lineage: lineage, Serial: db.Data.Serial + 1, - StateVersion: currentStateVersion, + StateVersion: db.Data.StateVersion, + Features: db.Data.Features, CLIVersion: build.GetInfo().Version, } return appendJSONLine(db.walFile, walHead) } +// RecordFeature marks the state as depending on the named feature, stamping the +// minimum CLI version required to read it (from featureMinCLIVersion). It must be +// called before the WAL is started (UpgradeToWrite) so the change is captured in +// the WAL header. +func (db *DeploymentState) RecordFeature(name string) { + minVersion, ok := featureMinCLIVersion[name] + if !ok { + panic(fmt.Sprintf("internal error: unknown feature %q", name)) + } + db.mu.Lock() + defer db.mu.Unlock() + if db.walFile != nil { + panic("internal error: RecordFeature must be called before the state is opened for write") + } + if db.Data.Features == nil { + db.Data.Features = make(map[string]string) + } + db.Data.Features[name] = minVersion +} + +// checkSupportedFeatures rejects a state that records a feature flag this CLI does +// not understand, pointing the user at the minimum CLI version the state recorded. +func checkSupportedFeatures(db *Database) error { + names := make([]string, 0, len(db.Features)) + for name := range db.Features { + names = append(names, name) + } + slices.Sort(names) + for _, name := range names { + if _, ok := featureMinCLIVersion[name]; !ok { + return fmt.Errorf("the deployment state requires feature %q which this CLI (%s) does not support; upgrade to %s or newer", name, build.GetInfo().Version, db.Features[name]) + } + } + return nil +} + func (db *DeploymentState) AssertOpenedForReadOrWrite() { if db.Path == "" { panic("internal error: DeploymentState must be opened first") @@ -452,14 +511,9 @@ func ExportStateFromData(data Database) resourcestate.ExportedResourcesMap { result := make(resourcestate.ExportedResourcesMap) for key, entry := range data.State { var etag string - // Extract etag for resources that use it for drift detection - // (dashboards and genie_spaces). Both follow the same pattern of - // persisting the backend-returned etag in state and comparing it - // against the remote on the next plan via OverrideChangeDesc. - // covered by test cases: - // - bundle/deploy/dashboard/detect-change - // - bundle/resources/genie_spaces/simple - if (strings.Contains(key, ".dashboards.") || strings.Contains(key, ".genie_spaces.")) && len(entry.State) > 0 { + // Extract etag for dashboards. + // covered by test case: bundle/deploy/dashboard/detect-change + if strings.Contains(key, ".dashboards.") && len(entry.State) > 0 { var holder struct { Etag string `json:"etag"` } @@ -469,9 +523,8 @@ func ExportStateFromData(data Database) resourcestate.ExportedResourcesMap { } result[key] = resourcestate.ResourceState{ - ID: entry.ID, - ETag: etag, - StateSizeBytes: len(entry.State), + ID: entry.ID, + ETag: etag, } } return result diff --git a/bundle/direct/dstate/state_test.go b/bundle/direct/dstate/state_test.go index b2d13c0a6c7..3b4d6ce1541 100644 --- a/bundle/direct/dstate/state_test.go +++ b/bundle/direct/dstate/state_test.go @@ -33,6 +33,65 @@ func TestOpenSaveFinalizeRoundTrip(t *testing.T) { mustFinalize(t, &db2) } +func TestRecordFeaturePersists(t *testing.T) { + path := filepath.Join(t.TempDir(), "state.json") + + // RecordFeature must run before the WAL is started (UpgradeToWrite), so the + // feature is captured in the WAL header and persisted. + var db DeploymentState + require.NoError(t, db.Open(t.Context(), path, WithRecovery(true), WithWrite(false))) + db.RecordFeature(featureDummy) + require.NoError(t, db.UpgradeToWrite()) + require.NoError(t, db.SaveState("jobs.my_job", "123", map[string]string{"key": "val"}, nil)) + mustFinalize(t, &db) + + var db2 DeploymentState + require.NoError(t, db2.Open(t.Context(), path, WithRecovery(false), WithWrite(false))) + assert.Equal(t, currentStateVersion, db2.Data.StateVersion) + assert.Equal(t, featureMinCLIVersion[featureDummy], db2.Data.Features[featureDummy]) + mustFinalize(t, &db2) +} + +func TestRecordFeaturePanicsAfterWALStarted(t *testing.T) { + path := filepath.Join(t.TempDir(), "state.json") + + var db DeploymentState + require.NoError(t, db.Open(t.Context(), path, WithRecovery(true), WithWrite(true))) + assert.Panics(t, func() { db.RecordFeature(featureDummy) }) + mustFinalize(t, &db) +} + +func TestOpenRejectsUnknownFeature(t *testing.T) { + // A state recording a feature this CLI does not know is rejected, naming the + // feature and the minimum CLI version the state recorded. + path := filepath.Join(t.TempDir(), "state.json") + data, err := json.Marshal(Database{Header: Header{ + StateVersion: currentStateVersion, + Features: map[string]string{"future_feature": "9.9.9"}, + }}) + require.NoError(t, err) + require.NoError(t, os.WriteFile(path, data, 0o600)) + + var db DeploymentState + err = db.Open(t.Context(), path, WithRecovery(true), WithWrite(false)) + require.Error(t, err) + assert.Contains(t, err.Error(), `feature "future_feature"`) + assert.Contains(t, err.Error(), "upgrade to 9.9.9 or newer") + + // A known feature loads fine. + path2 := filepath.Join(t.TempDir(), "state.json") + data, err = json.Marshal(Database{Header: Header{ + StateVersion: currentStateVersion, + Features: map[string]string{featureDummy: "0.0.0-dev"}, + }}) + require.NoError(t, err) + require.NoError(t, os.WriteFile(path2, data, 0o600)) + + var db2 DeploymentState + require.NoError(t, db2.Open(t.Context(), path2, WithRecovery(true), WithWrite(false))) + mustFinalize(t, &db2) +} + func TestFinalizeWithNoEntriesDoesNotWriteStateFile(t *testing.T) { path := filepath.Join(t.TempDir(), "state.json") @@ -56,40 +115,6 @@ func TestPanicOnDoubleOpen(t *testing.T) { mustFinalize(t, &db) } -func TestHeaderOnlyWALRecoveryDoesNotAdvanceSerial(t *testing.T) { - path := filepath.Join(t.TempDir(), "state.json") - walPath := path + walSuffix - - // Commit serial 1 with one resource. - var db DeploymentState - require.NoError(t, db.Open(t.Context(), path, WithRecovery(true), WithWrite(true))) - require.NoError(t, db.SaveState("jobs.my_job", "123", map[string]string{}, nil)) - mustFinalize(t, &db) - - var committed DeploymentState - require.NoError(t, committed.Open(t.Context(), path, WithRecovery(false), WithWrite(false))) - lineage := committed.Data.Lineage - require.Equal(t, 1, committed.Data.Serial) - mustFinalize(t, &committed) - - // A deploy that opens the WAL for write but commits nothing (e.g. planning - // fails after UpgradeToWrite) leaves a header-only WAL behind, here at the - // expected serial 2. Recovering it must not advance the serial past the - // committed 1, otherwise a second such failed deploy would write its header - // at serial 3 and be rejected as ahead of the committed state. - header := Header{Lineage: lineage, Serial: 2, StateVersion: currentStateVersion} - headerLine, err := json.Marshal(header) - require.NoError(t, err) - require.NoError(t, os.WriteFile(walPath, append(headerLine, '\n'), 0o600)) - - var recovered DeploymentState - require.NoError(t, recovered.Open(t.Context(), path, WithRecovery(true), WithWrite(false))) - assert.Equal(t, 1, recovered.Data.Serial) - assert.Equal(t, "123", recovered.GetResourceID("jobs.my_job")) - assert.NoFileExists(t, walPath) - mustFinalize(t, &recovered) -} - func TestDeleteState(t *testing.T) { path := filepath.Join(t.TempDir(), "state.json")