From b685ec2042ff64ebe127eea0e44c7a066b0d10d9 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Fri, 15 May 2026 11:55:38 +0200 Subject: [PATCH 01/15] Immutable folder support in DABs --- .../immutable_workspace_paths/databricks.yml | 21 ++ .../immutable_workspace_paths/out.test.toml | 3 + .../immutable_workspace_paths/output.txt | 30 +++ .../validate/immutable_workspace_paths/script | 1 + .../immutable_workspace_paths/src/main.py | 1 + .../immutable_workspace_paths/test.toml | 3 + bundle/config/bundle.go | 7 + .../mutator/resolve_variable_references.go | 24 ++ .../resolve_variable_references_test.go | 45 ++++ .../process_static_resources.go | 15 +- .../resourcemutator/resource_mutator.go | 19 +- bundle/config/mutator/translate_paths.go | 17 +- bundle/config/workspace.go | 6 + bundle/deploy/metadata/compute.go | 1 + bundle/deploy/metadata/load.go | 57 +++++ bundle/deploy/snapshot/path.go | 227 ++++++++++++++++++ bundle/deploy/snapshot/path_test.go | 136 +++++++++++ bundle/deploy/snapshot/upload.go | 69 ++++++ bundle/internal/schema/annotations.yml | 3 + bundle/metadata/metadata.go | 3 + bundle/phases/build.go | 8 + bundle/phases/deploy.go | 31 ++- bundle/phases/destroy.go | 15 +- libs/filer/snapshot_client.go | 103 ++++++++ 24 files changed, 835 insertions(+), 10 deletions(-) create mode 100644 acceptance/bundle/validate/immutable_workspace_paths/databricks.yml create mode 100644 acceptance/bundle/validate/immutable_workspace_paths/out.test.toml create mode 100644 acceptance/bundle/validate/immutable_workspace_paths/output.txt create mode 100644 acceptance/bundle/validate/immutable_workspace_paths/script create mode 100644 acceptance/bundle/validate/immutable_workspace_paths/src/main.py create mode 100644 acceptance/bundle/validate/immutable_workspace_paths/test.toml create mode 100644 bundle/deploy/metadata/load.go create mode 100644 bundle/deploy/snapshot/path.go create mode 100644 bundle/deploy/snapshot/path_test.go create mode 100644 bundle/deploy/snapshot/upload.go create mode 100644 libs/filer/snapshot_client.go diff --git a/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml b/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml new file mode 100644 index 00000000000..39c25fb365f --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml @@ -0,0 +1,21 @@ +bundle: + name: my-bundle + immutable: true + +sync: + exclude: + # Test framework files that are not part of the bundle source. + - "repls.json" + - "user_repls.json" + - "script" + - "*.toml" + +resources: + jobs: + my_job: + name: my job + tasks: + - task_key: my_task + existing_cluster_id: "0101-120000-aaaaaaaa" + spark_python_task: + python_file: ./src/main.py diff --git a/acceptance/bundle/validate/immutable_workspace_paths/out.test.toml b/acceptance/bundle/validate/immutable_workspace_paths/out.test.toml new file mode 100644 index 00000000000..f784a183258 --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/validate/immutable_workspace_paths/output.txt b/acceptance/bundle/validate/immutable_workspace_paths/output.txt new file mode 100644 index 00000000000..f6a8004bf2f --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/output.txt @@ -0,0 +1,30 @@ + +>>> [CLI] bundle validate -o json +Warning: Pattern user_repls.json does not match any files + at sync.exclude[1] + in databricks.yml:9:7 + +{ + "workspace": { + "artifact_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/artifacts", + "current_user": { + "domain_friendly_name": "[USERNAME]", + "id": "[USERID]", + "short_name": "[USERNAME]", + "userName": "[USERNAME]" + }, + "file_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/files", + "resource_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/resources", + "root_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default", + "state_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/state" + }, + "tasks": [ + { + "existing_cluster_id": "0101-120000-aaaaaaaa", + "spark_python_task": { + "python_file": "${workspace.snapshot_path}/src/files/src/main.py" + }, + "task_key": "my_task" + } + ] +} diff --git a/acceptance/bundle/validate/immutable_workspace_paths/script b/acceptance/bundle/validate/immutable_workspace_paths/script new file mode 100644 index 00000000000..df056fa9b99 --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/script @@ -0,0 +1 @@ +trace $CLI bundle validate -o json | jq '{workspace: .workspace, tasks: .resources.jobs.my_job.tasks}' diff --git a/acceptance/bundle/validate/immutable_workspace_paths/src/main.py b/acceptance/bundle/validate/immutable_workspace_paths/src/main.py new file mode 100644 index 00000000000..11b15b1a458 --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/src/main.py @@ -0,0 +1 @@ +print("hello") diff --git a/acceptance/bundle/validate/immutable_workspace_paths/test.toml b/acceptance/bundle/validate/immutable_workspace_paths/test.toml new file mode 100644 index 00000000000..85e02532c93 --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +Ignore = [".databricks"] diff --git a/bundle/config/bundle.go b/bundle/config/bundle.go index ce6d25bfe62..79f92b6b435 100644 --- a/bundle/config/bundle.go +++ b/bundle/config/bundle.go @@ -59,4 +59,11 @@ type Bundle struct { // A stable generated UUID for the bundle. This is normally serialized by // Databricks first party template when a user runs bundle init. Uuid string `json:"uuid,omitempty"` + + // Immutable specifies that bundle files and artifacts are uploaded as a single + // immutable snapshot rather than being synced individually. When true, the + // deployment calls /api/2.0/repos/snapshots with a zip containing all files + // and sets workspace.file_path and workspace.artifact_path to the returned + // content-addressed path. validate and plan make no mutative API calls. + Immutable bool `json:"immutable,omitempty"` } diff --git a/bundle/config/mutator/resolve_variable_references.go b/bundle/config/mutator/resolve_variable_references.go index 113f0576394..70a330c8a3c 100644 --- a/bundle/config/mutator/resolve_variable_references.go +++ b/bundle/config/mutator/resolve_variable_references.go @@ -59,6 +59,11 @@ type resolveVariableReferences struct { includeResources bool artifactsReferenceUsed bool + + // excludePaths lists variable reference paths (e.g. "workspace.file_path") whose + // resolution should be skipped. References to these paths remain unresolved so a + // later mutator can set the value and re-run resolution. + excludePaths []string } func ResolveVariableReferencesOnlyResources(prefixes ...string) bundle.Mutator { @@ -74,6 +79,22 @@ func ResolveVariableReferencesOnlyResources(prefixes ...string) bundle.Mutator { } } +// ResolveVariableReferencesOnlyResourcesExcluding resolves variable references in +// resources while leaving references to the specified paths unresolved. +// Used by ProcessStaticResources for immutable bundles so that ${workspace.snapshot_path} +// is not resolved during Initialize; it is resolved in the Deploy phase after +// snapshot.Upload() sets workspace.snapshot_path to the API-assigned path. +func ResolveVariableReferencesOnlyResourcesExcluding(excludePaths ...string) bundle.Mutator { + return &resolveVariableReferences{ + prefixes: defaultPrefixes, + lookupFn: lookup, + extraRounds: maxResolutionRounds - 1, + pattern: dyn.NewPattern(dyn.Key("resources")), + includeResources: true, + excludePaths: excludePaths, + } +} + func ResolveVariableReferencesWithoutResources(prefixes ...string) bundle.Mutator { if len(prefixes) == 0 { prefixes = defaultPrefixes @@ -229,6 +250,9 @@ func (m *resolveVariableReferences) resolveOnce(b *bundle.Bundle, prefixes []dyn // Perform resolution only if the path starts with one of the specified prefixes. if slices.ContainsFunc(prefixes, path.HasPrefix) { + if slices.Contains(m.excludePaths, path.String()) { + return dyn.InvalidValue, dynvar.ErrSkipResolution + } value, err := m.lookupFn(normalized, path, b) hasUpdates = hasUpdates || (err == nil && value.IsValid()) return value, err diff --git a/bundle/config/mutator/resolve_variable_references_test.go b/bundle/config/mutator/resolve_variable_references_test.go index 876980e9486..f682419f32a 100644 --- a/bundle/config/mutator/resolve_variable_references_test.go +++ b/bundle/config/mutator/resolve_variable_references_test.go @@ -6,7 +6,9 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/databricks/databricks-sdk-go/service/pipelines" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -63,3 +65,46 @@ func TestResolveVariableReferencesWithSourceLinkedDeployment(t *testing.T) { testCase.assert(t, b) } } + +// TestResolveVariableReferencesExcludePaths verifies that paths listed in excludePaths +// are skipped during resolution and left as unresolved variable references. +// This is used by ProcessStaticResources for immutable bundles so that +// ${workspace.file_path} and ${workspace.artifact_path} can be resolved later +// (in the Build phase, after artifacts are built and the correct snapshot path is known). +func TestResolveVariableReferencesExcludePaths(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Workspace: config.Workspace{ + FilePath: "/snapshot/path/src/files", + ArtifactPath: "/snapshot/path/src/artifacts", + }, + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job1": { + JobSettings: jobs.JobSettings{ + Tasks: []jobs.Task{ + { + SparkPythonTask: &jobs.SparkPythonTask{ + PythonFile: "${workspace.file_path}/main.py", + }, + }, + }, + }, + }, + }, + }, + }, + } + + // With exclusion: ${workspace.file_path} should remain unresolved. + diags := bundle.Apply(t.Context(), b, ResolveVariableReferencesOnlyResourcesExcluding("workspace.file_path", "workspace.artifact_path")) + require.NoError(t, diags.Error()) + assert.Equal(t, "${workspace.file_path}/main.py", b.Config.Resources.Jobs["job1"].Tasks[0].SparkPythonTask.PythonFile, + "reference should remain unresolved when path is excluded") + + // Without exclusion: ${workspace.file_path} should resolve normally. + diags = bundle.Apply(t.Context(), b, ResolveVariableReferencesOnlyResources()) + require.NoError(t, diags.Error()) + assert.Equal(t, "/snapshot/path/src/files/main.py", b.Config.Resources.Jobs["job1"].Tasks[0].SparkPythonTask.PythonFile, + "reference should resolve after exclusion is lifted") +} diff --git a/bundle/config/mutator/resourcemutator/process_static_resources.go b/bundle/config/mutator/resourcemutator/process_static_resources.go index 3f859fd11cd..94c823c75b8 100644 --- a/bundle/config/mutator/resourcemutator/process_static_resources.go +++ b/bundle/config/mutator/resourcemutator/process_static_resources.go @@ -38,13 +38,26 @@ func (p processStaticResources) Apply(ctx context.Context, b *bundle.Bundle) dia // we need to resolve variables because they can change path values: // - variable can be used a prefix // - path can be part of a complex variable value + + // For immutable bundles, defer resolving ${workspace.snapshot_path} in resources. + // The actual snapshot path is only known after snapshot.Upload() returns the + // API-assigned path in the deploy phase. + var resourceResolver bundle.Mutator + if b.Config.Bundle.Immutable { + resourceResolver = mutator.ResolveVariableReferencesOnlyResourcesExcluding( + "workspace.snapshot_path", + ) + } else { + resourceResolver = mutator.ResolveVariableReferencesOnlyResources() + } + bundle.ApplySeqContext( ctx, b, // Reads (dynamic): * (strings) (searches for variable references in string values) // Updates (dynamic): resources.* (strings) (resolves variable references to their actual values) // Resolves variable references in 'resources' using bundle, workspace, and variables prefixes - mutator.ResolveVariableReferencesOnlyResources(), + resourceResolver, // After normal variable resolution, log all ${resources.*} references mutator.LogResourceReferences(), mutator.NormalizePaths(), diff --git a/bundle/config/mutator/resourcemutator/resource_mutator.go b/bundle/config/mutator/resourcemutator/resource_mutator.go index 2eb292cfbb0..31afb65ffa2 100644 --- a/bundle/config/mutator/resourcemutator/resource_mutator.go +++ b/bundle/config/mutator/resourcemutator/resource_mutator.go @@ -127,6 +127,19 @@ func applyInitializeMutators(ctx context.Context, b *bundle.Bundle) { ) } +// resourceVarResolver returns a mutator that resolves variable references in +// resources. For immutable bundles, ${workspace.file_path} and +// ${workspace.artifact_path} are excluded: the API assigns the snapshot path +// after upload, so they must remain as-is until snapshot.Upload() has run. +func resourceVarResolver(b *bundle.Bundle) bundle.Mutator { + if b.Config.Bundle.Immutable { + return mutator.ResolveVariableReferencesOnlyResourcesExcluding( + "workspace.file_path", "workspace.artifact_path", + ) + } + return mutator.ResolveVariableReferencesOnlyResources() +} + // Normalization is applied multiple times if resource is modified during initialization // // If bundle is modified outside of 'resources' section, these changes are discarded. @@ -139,8 +152,10 @@ func applyNormalizeMutators(ctx context.Context, b *bundle.Bundle) { // Reads (dynamic): * (strings) (searches for variable references in string values) // Updates (dynamic): resources.* (strings) (resolves variable references to their actual values) - // Resolves variable references in 'resources' using bundle, workspace, and variables prefixes - mutator.ResolveVariableReferencesOnlyResources(), + // Resolves variable references in 'resources' using bundle, workspace, and variables prefixes. + // For immutable bundles, ${workspace.file_path} and ${workspace.artifact_path} are left + // unresolved: the actual snapshot path is assigned by the API after upload, not pre-computed. + resourceVarResolver(b), // Reads (dynamic): resources.pipelines.*.libraries (checks for notebook.path and file.path fields) // Updates (dynamic): resources.pipelines.*.libraries (expands glob patterns in path fields to multiple library entries) diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index 99dd75dd787..1d38cee2361 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -320,12 +320,21 @@ func (t *translateContext) rewriteValue(ctx context.Context, p dyn.Path, v dyn.V return dyn.NewValue(out, v.Locations()), nil } +// snapshotFilesRoot is the remote root used for file/notebook path translation +// in immutable bundles. References to this placeholder are resolved after +// snapshot.Upload() sets workspace.snapshot_path to the API-assigned path. +const snapshotFilesRoot = "${workspace.snapshot_path}/src/files" + func applyTranslations(ctx context.Context, b *bundle.Bundle, t *translateContext, translations []func(context.Context, dyn.Value) (dyn.Value, error)) diag.Diagnostics { - // Set the remote root to the sync root if source-linked deployment is enabled. - // Otherwise, set it to the workspace file path. - if config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment) { + switch { + case b.Config.Bundle.Immutable: + // Use a placeholder root that is resolved after snapshot.Upload() sets + // workspace.snapshot_path. This defers path computation until the actual + // content-addressed path is known. + t.remoteRoot = snapshotFilesRoot + case config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment): t.remoteRoot = t.b.SyncRootPath - } else { + default: t.remoteRoot = t.b.Config.Workspace.FilePath } diff --git a/bundle/config/workspace.go b/bundle/config/workspace.go index 9cd397f13aa..284bd0afe02 100644 --- a/bundle/config/workspace.go +++ b/bundle/config/workspace.go @@ -80,6 +80,12 @@ type Workspace struct { // Remote workspace path for deployment state. // This defaults to "${workspace.root}/state". StatePath string `json:"state_path,omitempty"` + + // SnapshotPath is the workspace path of the immutable snapshot uploaded during + // deployment. It is set by snapshot.Upload() and used to resolve + // ${workspace.snapshot_path} references in resource configurations. + // Only populated for bundles with bundle.immutable = true. + SnapshotPath string `json:"snapshot_path,omitempty" bundle:"internal"` } type User struct { diff --git a/bundle/deploy/metadata/compute.go b/bundle/deploy/metadata/compute.go index cb7be9811c4..08a45d7a17c 100644 --- a/bundle/deploy/metadata/compute.go +++ b/bundle/deploy/metadata/compute.go @@ -105,6 +105,7 @@ func (m *compute) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics // Set file upload destination of the bundle in metadata b.Metadata.Config.Workspace.FilePath = b.Config.Workspace.FilePath + b.Metadata.Config.Workspace.SnapshotPath = b.Config.Workspace.SnapshotPath // In source-linked deployment files are not copied and resources use source files, therefore we use sync path as file path in metadata if config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) { b.Metadata.Config.Workspace.FilePath = b.SyncRootPath diff --git a/bundle/deploy/metadata/load.go b/bundle/deploy/metadata/load.go new file mode 100644 index 00000000000..3fe4eb1c3bd --- /dev/null +++ b/bundle/deploy/metadata/load.go @@ -0,0 +1,57 @@ +package metadata + +import ( + "context" + "encoding/json" + "io" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/metadata" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/filer" +) + +type load struct{} + +// Load reads the metadata file written during the last deploy and populates +// fields on the bundle that are not available locally (e.g. workspace.snapshot_path +// for immutable bundles, which is only known after snapshot.Upload() ran). +func Load() bundle.Mutator { + return &load{} +} + +func (m *load) Name() string { + return "metadata.Load" +} + +func (m *load) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + f, err := filer.NewWorkspaceFilesClient(b.WorkspaceClient(ctx), b.Config.Workspace.StatePath) + if err != nil { + return diag.FromErr(err) + } + + r, err := f.Read(ctx, metadataFileName) + if err != nil { + // Missing metadata file means the bundle was never deployed or was + // deployed by an older CLI version that didn't write metadata. Treat + // it as a no-op so destroy can still proceed. + return nil + } + defer r.Close() + + raw, err := io.ReadAll(r) + if err != nil { + return diag.FromErr(err) + } + + var md metadata.Metadata + if err := json.Unmarshal(raw, &md); err != nil { + return diag.FromErr(err) + } + + if md.Config.Workspace.SnapshotPath != "" { + b.Config.Workspace.SnapshotPath = md.Config.Workspace.SnapshotPath + } + + return nil +} diff --git a/bundle/deploy/snapshot/path.go b/bundle/deploy/snapshot/path.go new file mode 100644 index 00000000000..820f20776ce --- /dev/null +++ b/bundle/deploy/snapshot/path.go @@ -0,0 +1,227 @@ +package snapshot + +import ( + "archive/zip" + "bytes" + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + "os" + "path/filepath" + "slices" + "strings" + "time" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/fileset" + "github.com/databricks/cli/libs/git" + "github.com/databricks/cli/libs/notebook" + "github.com/databricks/cli/libs/set" +) + +// zipEpoch is a fixed timestamp used for all zip entries to make the zip content-addressed +// and reproducible: the same file content always produces the same hash regardless of when +// the zip was built or the file's mtime. +var zipEpoch = time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC) + +// snapshotBaseDir is the workspace path under which all immutable snapshots are stored. +// The full path for a snapshot is snapshotBaseDir/{bundle_name}/{snapshot_id}. +const snapshotBaseDir = "/Workspace/Shared/.snapshots" + +// BundleZip builds the zip that is uploaded to the snapshot API. +// It contains: +// - all files from the bundle sync root under the "files/" prefix, +// selected with the same git-aware + include/exclude logic as files.Upload +// - all built artifact files under the "artifacts/.internal/" prefix +// +// The snapshot ID is always IDFromContent(BundleZip(b)), ensuring the +// pre-calculated path and the uploaded path are derived from the same content. +func BundleZip(ctx context.Context, b *bundle.Bundle) ([]byte, error) { + var buf bytes.Buffer + zw := zip.NewWriter(&buf) + + if err := addSyncRootToZip(ctx, zw, b); err != nil { + return nil, err + } + if err := addArtifactsToZip(zw, b); err != nil { + return nil, err + } + + if err := zw.Close(); err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +// IDFromContent returns the SHA-256 hex digest of content. +func IDFromContent(content []byte) string { + h := sha256.Sum256(content) + return hex.EncodeToString(h[:]) +} + +// SnapshotID builds the bundle zip and returns its SHA-256 hex digest. +// Called after artifacts are built so that ApplyImmutableWorkspacePaths and +// snapshot.Upload both hash identical content. +func SnapshotID(ctx context.Context, b *bundle.Bundle) (string, error) { + content, err := BundleZip(ctx, b) + if err != nil { + return "", err + } + return IDFromContent(content), nil +} + +// SnapshotPath returns the workspace path for a snapshot: +// snapshotBaseDir/{bundleName}/{snapshotID}. +func SnapshotPath(bundleName, snapshotID string) string { + return snapshotBaseDir + "/" + bundleName + "/" + snapshotID +} + +// syncFiles returns the list of files to include in the snapshot zip using the +// same git-aware include/exclude logic as files.Upload (libs/sync). +func syncFiles(ctx context.Context, b *bundle.Bundle) ([]fileset.File, error) { + // Use git.NewFileSet so that .gitignore rules are respected, matching the + // behaviour of the normal files.Upload sync path. + // Avoid passing an empty/nil paths slice: git.NewFileSet forwards it to + // fileset.New whose variadic default ("." if no args) is bypassed when the + // caller explicitly passes a nil slice. The SyncDefaultPath mutator always + // sets Sync.Paths to ["."] in the normal pipeline; we replicate that here + // so BundleZip works even when the bundle hasn't gone through the full pipeline. + var gitFS *git.FileSet + var err error + if len(b.Config.Sync.Paths) > 0 { + gitFS, err = git.NewFileSet(ctx, b.WorktreeRoot, b.SyncRoot, b.Config.Sync.Paths) + } else { + gitFS, err = git.NewFileSet(ctx, b.WorktreeRoot, b.SyncRoot) + } + if err != nil { + return nil, fmt.Errorf("build file set: %w", err) + } + + all := set.NewSetF(func(f fileset.File) string { + return f.Relative + }) + + gitFiles, err := gitFS.Files() + if err != nil { + return nil, fmt.Errorf("list sync files: %w", err) + } + all.Add(gitFiles...) + + if len(b.Config.Sync.Include) > 0 { + includeFS, err := fileset.NewGlobSet(b.SyncRoot, b.Config.Sync.Include) + if err != nil { + return nil, fmt.Errorf("build include set: %w", err) + } + include, err := includeFS.Files() + if err != nil { + return nil, fmt.Errorf("list include files: %w", err) + } + all.Add(include...) + } + + if len(b.Config.Sync.Exclude) > 0 { + excludeFS, err := fileset.NewGlobSet(b.SyncRoot, b.Config.Sync.Exclude) + if err != nil { + return nil, fmt.Errorf("build exclude set: %w", err) + } + exclude, err := excludeFS.Files() + if err != nil { + return nil, fmt.Errorf("list exclude files: %w", err) + } + for _, f := range exclude { + all.Remove(f) + } + } + + files := all.Iter() + // Sort for a stable zip (same content → same hash regardless of map iteration order). + slices.SortFunc(files, func(a, b fileset.File) int { + if a.Relative < b.Relative { + return -1 + } + if a.Relative > b.Relative { + return 1 + } + return 0 + }) + return files, nil +} + +func addSyncRootToZip(ctx context.Context, zw *zip.Writer, b *bundle.Bundle) error { + files, err := syncFiles(ctx, b) + if err != nil { + return err + } + + for _, f := range files { + rc, err := b.SyncRoot.Open(f.Relative) + if err != nil { + return fmt.Errorf("open %s: %w", f.Relative, err) + } + + // Notebooks are stored without their file extension, matching how + // Databricks workspace imports them (e.g. sample_notebook.ipynb → + // sample_notebook). Job tasks reference the extension-stripped path. + entryPath := filepath.ToSlash(f.Relative) + if isNb, _, nbErr := notebook.DetectWithFS(b.SyncRoot, f.Relative); nbErr == nil && isNb { + entryPath = strings.TrimSuffix(entryPath, filepath.Ext(entryPath)) + } + + h := &zip.FileHeader{ + Name: "files/" + entryPath, + Method: zip.Deflate, + Modified: zipEpoch, + } + w, err := zw.CreateHeader(h) + if err != nil { + rc.Close() + return fmt.Errorf("zip entry for %s: %w", f.Relative, err) + } + _, err = io.Copy(w, rc) + rc.Close() + if err != nil { + return fmt.Errorf("write %s: %w", f.Relative, err) + } + } + return nil +} + +func addArtifactsToZip(zw *zip.Writer, b *bundle.Bundle) error { + for _, artifact := range b.Config.Artifacts { + for _, af := range artifact.Files { + source := af.Source + if af.Patched != "" { + source = af.Patched + } + // ".internal" matches libraries.InternalDirName so that ReplaceWithRemotePath + // produces library paths that resolve correctly inside the snapshot. + if err := addLocalFileToZip(zw, source, "artifacts/.internal"); err != nil { + return err + } + } + } + return nil +} + +func addLocalFileToZip(zw *zip.Writer, localPath, zipPrefix string) error { + f, err := os.Open(localPath) + if err != nil { + return fmt.Errorf("open %s: %w", localPath, err) + } + defer f.Close() + + entryName := zipPrefix + "/" + filepath.Base(localPath) + h := &zip.FileHeader{ + Name: entryName, + Method: zip.Deflate, + Modified: zipEpoch, + } + w, err := zw.CreateHeader(h) + if err != nil { + return fmt.Errorf("zip entry %s: %w", entryName, err) + } + _, err = io.Copy(w, f) + return err +} diff --git a/bundle/deploy/snapshot/path_test.go b/bundle/deploy/snapshot/path_test.go new file mode 100644 index 00000000000..f5bc2ab610c --- /dev/null +++ b/bundle/deploy/snapshot/path_test.go @@ -0,0 +1,136 @@ +package snapshot_test + +import ( + "archive/zip" + "bytes" + "os" + "path/filepath" + "slices" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/deploy/snapshot" + "github.com/databricks/cli/libs/vfs" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func makeBundleWithFiles(t *testing.T, files map[string]string) *bundle.Bundle { + t.Helper() + dir := t.TempDir() + for name, content := range files { + p := filepath.Join(dir, name) + require.NoError(t, os.MkdirAll(filepath.Dir(p), 0o755)) + require.NoError(t, os.WriteFile(p, []byte(content), 0o644)) + } + root := vfs.MustNew(dir) + return &bundle.Bundle{ + BundleRootPath: dir, + SyncRoot: root, + // WorktreeRoot = SyncRoot is the fallback used by LoadGitDetails when + // there is no git repository. + WorktreeRoot: root, + Config: config.Root{}, + } +} + +func TestBundleZipIsDeterministic(t *testing.T) { + b := makeBundleWithFiles(t, map[string]string{ + "main.py": "print('hello')", + "src/task.py": "def run(): pass", + }) + + zip1, err := snapshot.BundleZip(t.Context(), b) + require.NoError(t, err) + zip2, err := snapshot.BundleZip(t.Context(), b) + require.NoError(t, err) + + assert.Equal(t, zip1, zip2, "BundleZip must produce identical bytes for identical content") +} + +func TestBundleZipChangesWithContent(t *testing.T) { + b1 := makeBundleWithFiles(t, map[string]string{"main.py": "v1"}) + b2 := makeBundleWithFiles(t, map[string]string{"main.py": "v2"}) + + zip1, err := snapshot.BundleZip(t.Context(), b1) + require.NoError(t, err) + zip2, err := snapshot.BundleZip(t.Context(), b2) + require.NoError(t, err) + + assert.NotEqual(t, zip1, zip2, "different file content must produce different zips") +} + +func TestBundleZipRespectsExcludes(t *testing.T) { + b := makeBundleWithFiles(t, map[string]string{ + "main.py": "print('hello')", + "skip.json": `{"id": "runtime-generated"}`, + }) + bExclude := makeBundleWithFiles(t, map[string]string{ + "main.py": "print('hello')", + "skip.json": `{"id": "runtime-generated"}`, + }) + bExclude.Config.Sync.Exclude = []string{"*.json"} + + zipAll, err := snapshot.BundleZip(t.Context(), b) + require.NoError(t, err) + zipExcl, err := snapshot.BundleZip(t.Context(), bExclude) + require.NoError(t, err) + + // The zip without the excluded file should be smaller and different. + assert.NotEqual(t, zipAll, zipExcl) + assert.Less(t, len(zipExcl), len(zipAll)) +} + +func TestIDFromContent(t *testing.T) { + id := snapshot.IDFromContent([]byte("hello")) + // SHA-256 of "hello" + assert.Equal(t, "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824", id) + assert.Len(t, id, 64, "SHA-256 hex must be 64 characters") +} + +func TestSnapshotPath(t *testing.T) { + p := snapshot.SnapshotPath("my-bundle", "abc123") + assert.Equal(t, "/Workspace/Shared/.snapshots/my-bundle/abc123", p) +} + +func TestSnapshotIDMatchesBundleZipHash(t *testing.T) { + b := makeBundleWithFiles(t, map[string]string{"task.py": "x = 1"}) + + zipContent, err := snapshot.BundleZip(t.Context(), b) + require.NoError(t, err) + expectedID := snapshot.IDFromContent(zipContent) + + id, err := snapshot.SnapshotID(t.Context(), b) + require.NoError(t, err) + + assert.Equal(t, expectedID, id) +} + +func zipEntryNames(t *testing.T, zipContent []byte) []string { + t.Helper() + r, err := zip.NewReader(bytes.NewReader(zipContent), int64(len(zipContent))) + require.NoError(t, err) + names := make([]string, len(r.File)) + for i, f := range r.File { + names[i] = f.Name + } + return names +} + +func TestBundleZipStripsNotebookExtensions(t *testing.T) { + // Minimal valid Jupyter notebook content. + ipynb := `{"nbformat": 4, "nbformat_minor": 5, "cells": [], "metadata": {}}` + b := makeBundleWithFiles(t, map[string]string{ + "src/my_notebook.ipynb": ipynb, + "src/script.py": "print('hello')", + }) + + zipContent, err := snapshot.BundleZip(t.Context(), b) + require.NoError(t, err) + + names := zipEntryNames(t, zipContent) + assert.True(t, slices.Contains(names, "files/src/my_notebook"), "notebook should have extension stripped") + assert.False(t, slices.Contains(names, "files/src/my_notebook.ipynb"), "notebook should not appear with .ipynb extension") + assert.True(t, slices.Contains(names, "files/src/script.py"), "regular Python file should keep its extension") +} diff --git a/bundle/deploy/snapshot/upload.go b/bundle/deploy/snapshot/upload.go new file mode 100644 index 00000000000..73d84eb9703 --- /dev/null +++ b/bundle/deploy/snapshot/upload.go @@ -0,0 +1,69 @@ +package snapshot + +import ( + "context" + "fmt" + "path" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/filer" + "github.com/databricks/cli/libs/log" +) + +type snapshotUpload struct { + // uploader allows test injection of a custom SnapshotUploader. + uploader filer.SnapshotUploader +} + +// Upload returns a mutator that builds the bundle zip, uploads it via +// /api/2.0/repos/snapshots, and updates workspace.file_path and +// workspace.artifact_path to the content-addressed location returned by the API. +func Upload() bundle.Mutator { + return &snapshotUpload{} +} + +// UploadWithClient returns an upload mutator backed by the provided SnapshotUploader. +// This is intended for use in tests. +func UploadWithClient(uploader filer.SnapshotUploader) bundle.Mutator { + return &snapshotUpload{uploader: uploader} +} + +func (m *snapshotUpload) Name() string { + return "snapshot.Upload" +} + +func (m *snapshotUpload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + uploader := m.uploader + if uploader == nil { + var err error + uploader, err = filer.NewSnapshotUploader(b.WorkspaceClient(ctx)) + if err != nil { + return diag.FromErr(err) + } + } + + cmdio.LogString(ctx, "Uploading immutable bundle snapshot...") + + zipContent, err := BundleZip(ctx, b) + if err != nil { + return diag.FromErr(fmt.Errorf("failed to build snapshot zip: %w", err)) + } + snapshotID := IDFromContent(zipContent) + log.Debugf(ctx, "snapshot.Upload: snapshotID=%s zip=%d bytes", snapshotID, len(zipContent)) + + info, err := uploader.Upload(ctx, b.Config.Bundle.Name, snapshotID, b.Config.Workspace.CurrentUser.UserName, zipContent) + if err != nil { + return diag.FromErr(err) + } + + log.Infof(ctx, "Snapshot uploaded to %s", info.Path) + + // The API unpacks the zip under a "src" subdirectory. + b.Config.Workspace.SnapshotPath = info.Path + b.Config.Workspace.FilePath = path.Join(info.Path, "src", "files") + b.Config.Workspace.ArtifactPath = path.Join(info.Path, "src", "artifacts") + + return nil +} diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index 2f28ca27596..e63f6625d1d 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -50,6 +50,9 @@ github.com/databricks/cli/bundle/config.Bundle: The Git version control details that are associated with your bundle. "markdown_description": |- The Git version control details that are associated with your bundle. For supported attributes see [\_](/dev-tools/bundles/settings.md#git). + "immutable": + "description": |- + Whether to upload bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a zip and uploaded via the snapshot API, and workspace.file_path and workspace.artifact_path are set to the returned content-addressed path. The validate and plan commands make no mutative API calls when this is enabled. "name": "description": |- The name of the bundle. diff --git a/bundle/metadata/metadata.go b/bundle/metadata/metadata.go index 1c61cb093f0..e5549472696 100644 --- a/bundle/metadata/metadata.go +++ b/bundle/metadata/metadata.go @@ -15,6 +15,9 @@ type Bundle struct { type Workspace struct { FilePath string `json:"file_path"` + // SnapshotPath is the workspace path of the immutable snapshot uploaded + // during deployment. Only populated for bundles with bundle.immutable = true. + SnapshotPath string `json:"snapshot_path,omitempty"` } type Resource struct { diff --git a/bundle/phases/build.go b/bundle/phases/build.go index 5a32435f8f1..a0649e373f0 100644 --- a/bundle/phases/build.go +++ b/bundle/phases/build.go @@ -14,6 +14,7 @@ import ( "github.com/databricks/cli/libs/logdiag" ) + type LibLocationMap map[string][]libraries.LocationToUpdate // The build phase builds artifacts. @@ -24,6 +25,7 @@ func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { scripts.Execute(config.ScriptPreBuild), artifacts.Build(), scripts.Execute(config.ScriptPostBuild), + mutator.ResolveVariableReferencesWithoutResources( "artifacts", ), @@ -41,6 +43,12 @@ func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { libraries.SwitchToPatchedWheels(), ) + // For immutable bundles, library remote paths are set in the deploy phase + // after snapshot.Upload() provides the content-addressed workspace.artifact_path. + if b.Config.Bundle.Immutable { + return nil + } + libs, diags := libraries.ReplaceWithRemotePath(ctx, b) for _, diag := range diags { logdiag.LogDiag(ctx, diag) diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index b4d70ede5ad..5534354bc61 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -8,10 +8,12 @@ import ( "github.com/databricks/cli/bundle/artifacts" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/engine" + "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/deploy" "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" "github.com/databricks/cli/bundle/deploy/metadata" + "github.com/databricks/cli/bundle/deploy/snapshot" "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/deployplan" "github.com/databricks/cli/bundle/direct" @@ -131,13 +133,38 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand bundle.ApplyContext(ctx, b, lock.Release(lock.GoalDeploy)) }() - uploadLibraries(ctx, b, libs) + if b.Config.Bundle.Immutable { + // Upload all source files and built artifacts as a single immutable snapshot. + // The API assigns a content-addressed path, so workspace.snapshot_path (and + // derived workspace.file_path / workspace.artifact_path) are only known after + // upload. Resolve variable references in resources and set library remote paths + // once the actual paths are available. + bundle.ApplySeqContext(ctx, b, + snapshot.Upload(), + mutator.ResolveVariableReferencesOnlyResources(), + ) + if !logdiag.HasError(ctx) { + _, libDiags := libraries.ReplaceWithRemotePath(ctx, b) + for _, d := range libDiags { + logdiag.LogDiag(ctx, d) + } + } + } else { + uploadLibraries(ctx, b, libs) + } + if logdiag.HasError(ctx) { return } + if !b.Config.Bundle.Immutable { + bundle.ApplySeqContext(ctx, b, files.Upload(outputHandler)) + if logdiag.HasError(ctx) { + return + } + } + bundle.ApplySeqContext(ctx, b, - files.Upload(outputHandler), deploy.StateUpdate(), deploy.StatePush(), permissions.ApplyWorkspaceRootPermissions(), diff --git a/bundle/phases/destroy.go b/bundle/phases/destroy.go index 91640ac6cad..71d5468c253 100644 --- a/bundle/phases/destroy.go +++ b/bundle/phases/destroy.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" + deploymetadata "github.com/databricks/cli/bundle/deploy/metadata" "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/deployplan" "github.com/databricks/cli/bundle/direct" @@ -123,17 +124,29 @@ func Destroy(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) { }() if !engine.IsDirect() { - bundle.ApplySeqContext(ctx, b, + mutators := []bundle.Mutator{ // We need to resolve artifact variable (how we do it in build phase) // because some of the to-be-destroyed resource might use this variable. // Not resolving might lead to terraform "Reference to undeclared resource" error mutator.ResolveVariableReferencesWithoutResources("artifacts"), mutator.ResolveVariableReferencesOnlyResources("artifacts"), + } + + if b.Config.Bundle.Immutable { + // For immutable bundles, resource paths contain ${workspace.snapshot_path} + // which was set during deploy by snapshot.Upload(). Load it from the stored + // metadata so it can be resolved before Terraform processes the config. + mutators = append([]bundle.Mutator{deploymetadata.Load()}, mutators...) + mutators = append(mutators, mutator.ResolveVariableReferencesOnlyResources()) + } + mutators = append(mutators, terraform.Interpolate(), terraform.Write(), terraform.Plan(terraform.PlanGoal("destroy")), ) + + bundle.ApplySeqContext(ctx, b, mutators...) } if logdiag.HasError(ctx) { diff --git a/libs/filer/snapshot_client.go b/libs/filer/snapshot_client.go new file mode 100644 index 00000000000..a7d84891cb0 --- /dev/null +++ b/libs/filer/snapshot_client.go @@ -0,0 +1,103 @@ +package filer + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "mime/multipart" + "net/http" + "net/textproto" + + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/client" +) + +// SnapshotInfo holds the result of a successful snapshot upload. +type SnapshotInfo struct { + // Path is the immutable workspace path for the uploaded snapshot content. + Path string +} + +// SnapshotUploader abstracts the /api/2.0/repos/snapshots endpoint. +// snapshotID is the content-addressed key supplied by the caller; the API uses +// it as the final path component so that identical content always resolves to +// the same workspace location. +// This interface exists so the implementation can later be replaced with a Go SDK call. +type SnapshotUploader interface { + Upload(ctx context.Context, bundleID, snapshotID, currentUser string, zipContent []byte) (*SnapshotInfo, error) +} + +// snapshotAPIClient implements SnapshotUploader against /api/2.0/repos/snapshots. +type snapshotAPIClient struct { + apiClient apiClient +} + +// snapshotUploadResponse mirrors the /api/2.0/repos/snapshots response body. +type snapshotUploadResponse struct { + Snapshot struct { + Path string `json:"path"` + } `json:"snapshot"` +} + +// NewSnapshotUploader creates a SnapshotUploader backed by /api/2.0/repos/snapshots. +func NewSnapshotUploader(w *databricks.WorkspaceClient) (SnapshotUploader, error) { + apiClient, err := client.New(w.Config) + if err != nil { + return nil, err + } + return &snapshotAPIClient{apiClient: apiClient}, nil +} + +// Upload uploads zipContent as an immutable snapshot identified by snapshotID. +// snapshotID is the SHA-256 of the files-only zip and is used by the server as +// the content-addressed path component. currentUser is granted CAN_READ on the snapshot. +func (c *snapshotAPIClient) Upload(ctx context.Context, bundleID, snapshotID, currentUser string, zipContent []byte) (*SnapshotInfo, error) { + var body bytes.Buffer + mw := multipart.NewWriter(&body) + + if err := mw.WriteField("snapshot_id", snapshotID); err != nil { + return nil, fmt.Errorf("failed to write snapshot_id: %w", err) + } + if err := mw.WriteField("bundle_id", bundleID); err != nil { + return nil, fmt.Errorf("failed to write bundle_id: %w", err) + } + + // The API requires an access_control_list granting the current user read access. + acl, err := json.Marshal([]map[string]string{ + {"user_name": currentUser, "permission_level": "CAN_READ"}, + }) + if err != nil { + return nil, fmt.Errorf("failed to marshal access_control_list: %w", err) + } + if err := mw.WriteField("access_control_list", string(acl)); err != nil { + return nil, fmt.Errorf("failed to write access_control_list: %w", err) + } + + // Attach the zip with an explicit content-type so the server treats it as binary. + fh := make(textproto.MIMEHeader) + fh.Set("Content-Disposition", `form-data; name="file"; filename="snapshot.zip"`) + fh.Set("Content-Type", "application/zip") + part, err := mw.CreatePart(fh) + if err != nil { + return nil, fmt.Errorf("failed to create file part: %w", err) + } + if _, err := part.Write(zipContent); err != nil { + return nil, fmt.Errorf("failed to write zip content: %w", err) + } + if err := mw.Close(); err != nil { + return nil, fmt.Errorf("failed to finalize multipart body: %w", err) + } + + headers := map[string]string{ + "Content-Type": mw.FormDataContentType(), + } + + var resp snapshotUploadResponse + err = c.apiClient.Do(ctx, http.MethodPost, "/api/2.0/repos/snapshots", headers, nil, body.Bytes(), &resp) + if err != nil { + return nil, fmt.Errorf("snapshot upload: %w", err) + } + + return &SnapshotInfo{Path: resp.Snapshot.Path}, nil +} From a429b263c19ef84f93c4657997848876d6f72a68 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 28 May 2026 11:03:38 +0200 Subject: [PATCH 02/15] remove unused snapshot path method --- bundle/deploy/snapshot/path.go | 10 ---------- bundle/deploy/snapshot/path_test.go | 5 ----- 2 files changed, 15 deletions(-) diff --git a/bundle/deploy/snapshot/path.go b/bundle/deploy/snapshot/path.go index 820f20776ce..3be2d195a8c 100644 --- a/bundle/deploy/snapshot/path.go +++ b/bundle/deploy/snapshot/path.go @@ -26,10 +26,6 @@ import ( // the zip was built or the file's mtime. var zipEpoch = time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC) -// snapshotBaseDir is the workspace path under which all immutable snapshots are stored. -// The full path for a snapshot is snapshotBaseDir/{bundle_name}/{snapshot_id}. -const snapshotBaseDir = "/Workspace/Shared/.snapshots" - // BundleZip builds the zip that is uploaded to the snapshot API. // It contains: // - all files from the bundle sync root under the "files/" prefix, @@ -72,12 +68,6 @@ func SnapshotID(ctx context.Context, b *bundle.Bundle) (string, error) { return IDFromContent(content), nil } -// SnapshotPath returns the workspace path for a snapshot: -// snapshotBaseDir/{bundleName}/{snapshotID}. -func SnapshotPath(bundleName, snapshotID string) string { - return snapshotBaseDir + "/" + bundleName + "/" + snapshotID -} - // syncFiles returns the list of files to include in the snapshot zip using the // same git-aware include/exclude logic as files.Upload (libs/sync). func syncFiles(ctx context.Context, b *bundle.Bundle) ([]fileset.File, error) { diff --git a/bundle/deploy/snapshot/path_test.go b/bundle/deploy/snapshot/path_test.go index f5bc2ab610c..157cdf2c9ba 100644 --- a/bundle/deploy/snapshot/path_test.go +++ b/bundle/deploy/snapshot/path_test.go @@ -89,11 +89,6 @@ func TestIDFromContent(t *testing.T) { assert.Len(t, id, 64, "SHA-256 hex must be 64 characters") } -func TestSnapshotPath(t *testing.T) { - p := snapshot.SnapshotPath("my-bundle", "abc123") - assert.Equal(t, "/Workspace/Shared/.snapshots/my-bundle/abc123", p) -} - func TestSnapshotIDMatchesBundleZipHash(t *testing.T) { b := makeBundleWithFiles(t, map[string]string{"task.py": "x = 1"}) From e7c196889775ff31d0a9f05a78254bc94935eded Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 28 May 2026 12:00:22 +0200 Subject: [PATCH 03/15] added an acceptance test --- .../deploy/immutable/databricks.yml.tmpl | 32 +++++++++++ .../bundle/deploy/immutable/out.test.toml | 3 + acceptance/bundle/deploy/immutable/output.txt | 56 +++++++++++++++++++ .../bundle/deploy/immutable/pyproject.toml | 31 ++++++++++ acceptance/bundle/deploy/immutable/script | 17 ++++++ .../immutable/src/immutable/__init__.py | 0 .../deploy/immutable/src/immutable/main.py | 6 ++ .../bundle/deploy/immutable/src/main.py | 1 + .../bundle/deploy/immutable/src/notebook.py | 3 + acceptance/bundle/deploy/immutable/test.toml | 10 ++++ 10 files changed, 159 insertions(+) create mode 100644 acceptance/bundle/deploy/immutable/databricks.yml.tmpl create mode 100644 acceptance/bundle/deploy/immutable/out.test.toml create mode 100644 acceptance/bundle/deploy/immutable/output.txt create mode 100644 acceptance/bundle/deploy/immutable/pyproject.toml create mode 100644 acceptance/bundle/deploy/immutable/script create mode 100644 acceptance/bundle/deploy/immutable/src/immutable/__init__.py create mode 100644 acceptance/bundle/deploy/immutable/src/immutable/main.py create mode 100644 acceptance/bundle/deploy/immutable/src/main.py create mode 100644 acceptance/bundle/deploy/immutable/src/notebook.py create mode 100644 acceptance/bundle/deploy/immutable/test.toml diff --git a/acceptance/bundle/deploy/immutable/databricks.yml.tmpl b/acceptance/bundle/deploy/immutable/databricks.yml.tmpl new file mode 100644 index 00000000000..23805de2402 --- /dev/null +++ b/acceptance/bundle/deploy/immutable/databricks.yml.tmpl @@ -0,0 +1,32 @@ +bundle: + name: test-bundle-immutable-$UNIQUE_NAME + immutable: true + +artifacts: + python_artifact: + type: whl + build: uv build --wheel + +resources: + jobs: + my_job: + name: my job + tasks: + - task_key: spark_python_task + spark_python_task: + python_file: ./src/main.py + environment_key: env + - task_key: notebook_task + notebook_task: + notebook_path: ./src/notebook.py + - task_key: python_wheel_task + python_wheel_task: + package_name: immutable + entry_point: main + environment_key: env + environments: + - environment_key: env + spec: + environment_version: "4" + dependencies: + - ./dist/*.whl diff --git a/acceptance/bundle/deploy/immutable/out.test.toml b/acceptance/bundle/deploy/immutable/out.test.toml new file mode 100644 index 00000000000..650836edeb3 --- /dev/null +++ b/acceptance/bundle/deploy/immutable/out.test.toml @@ -0,0 +1,3 @@ +Local = false +Cloud = true +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/deploy/immutable/output.txt b/acceptance/bundle/deploy/immutable/output.txt new file mode 100644 index 00000000000..5ebc1c24ae0 --- /dev/null +++ b/acceptance/bundle/deploy/immutable/output.txt @@ -0,0 +1,56 @@ + +>>> [CLI] bundle validate +Name: test-bundle-immutable-[UNIQUE_NAME] +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-[UNIQUE_NAME]/default + +Validation OK! + +>>> [CLI] bundle deploy +Building python_artifact... +Uploading immutable bundle snapshot... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] jobs get [NUMID] +"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/main.py" + +>>> [CLI] jobs get [NUMID] +"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook" + +>>> [CLI] jobs get [NUMID] +[ + "/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/artifacts/.internal/immutable-0.0.1-py3-none-any.whl" +] + +>>> [CLI] bundle run my_job +Run URL: [DATABRICKS_URL]/?o=[NUMID]#job/[NUMID]/run/[NUMID] + +[TIMESTAMP] "my job" RUNNING +[TIMESTAMP] "my job" INTERNAL_ERROR FAILED Task notebook_task failed with message: Unexpected failure while fetching notebook at /Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook: '/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook' is not a notebook. +Task notebook_task FAILED: +Run failed with error message + Unexpected failure while fetching notebook at /Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook: '/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook' is not a notebook. + + +Error: Task notebook_task failed! +Error: +Run failed with error message + Unexpected failure while fetching notebook at /Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook: '/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook' is not a notebook. +Trace: + +Error: failed to reach TERMINATED or SKIPPED, got INTERNAL_ERROR: Task notebook_task failed with message: Unexpected failure while fetching notebook at /Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook: '/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook' is not a notebook. + +>>> [CLI] bundle destroy --auto-approve +The following resources will be deleted: + delete resources.jobs.my_job + +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-[UNIQUE_NAME]/default + +Deleting files... +Destroy complete! + +Exit code: 1 diff --git a/acceptance/bundle/deploy/immutable/pyproject.toml b/acceptance/bundle/deploy/immutable/pyproject.toml new file mode 100644 index 00000000000..3e49b180137 --- /dev/null +++ b/acceptance/bundle/deploy/immutable/pyproject.toml @@ -0,0 +1,31 @@ +[project] +name = "immutable" +version = "0.0.1" +authors = [{ name = "andrew.nester@databricks.com" }] +requires-python = ">=3.10,<3.13" +dependencies = [ + # Any dependencies for jobs and pipelines in this project can be added here + # See also https://docs.databricks.com/dev-tools/bundles/library-dependencies + # + # LIMITATION: for pipelines, dependencies are cached during development; + # add dependencies to the 'environment' section of your pipeline.yml file instead +] + +[dependency-groups] +dev = [ + "pytest", + "ruff", + "databricks-dlt", + "databricks-connect>=15.4,<15.5", + "ipykernel", +] + +[project.scripts] +main = "immutable.main:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.ruff] +line-length = 120 diff --git a/acceptance/bundle/deploy/immutable/script b/acceptance/bundle/deploy/immutable/script new file mode 100644 index 00000000000..57d5cfbf3ec --- /dev/null +++ b/acceptance/bundle/deploy/immutable/script @@ -0,0 +1,17 @@ +envsubst < databricks.yml.tmpl > databricks.yml +cleanup() { + trace $CLI bundle destroy --auto-approve +} +trap cleanup EXIT + +trace $CLI bundle validate +trace $CLI bundle deploy + + +# Get a job and check that task paths are immutable +JOB_ID=$($CLI bundle summary -o json | jq -r '.resources.jobs.my_job.id') +trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.spark_python_task != null) | .spark_python_task.python_file' +trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.notebook_task != null) | .notebook_task.notebook_path' +trace $CLI jobs get $JOB_ID | jq '.settings.environments[0].spec.dependencies' + +trace $CLI bundle run my_job diff --git a/acceptance/bundle/deploy/immutable/src/immutable/__init__.py b/acceptance/bundle/deploy/immutable/src/immutable/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/acceptance/bundle/deploy/immutable/src/immutable/main.py b/acceptance/bundle/deploy/immutable/src/immutable/main.py new file mode 100644 index 00000000000..9eccd00150d --- /dev/null +++ b/acceptance/bundle/deploy/immutable/src/immutable/main.py @@ -0,0 +1,6 @@ +def main(): + print("Hello from Python Wheel Task!") + + +if __name__ == "__main__": + main() diff --git a/acceptance/bundle/deploy/immutable/src/main.py b/acceptance/bundle/deploy/immutable/src/main.py new file mode 100644 index 00000000000..6c285f7e2f5 --- /dev/null +++ b/acceptance/bundle/deploy/immutable/src/main.py @@ -0,0 +1 @@ +print("Hello from Spark Python Task!") diff --git a/acceptance/bundle/deploy/immutable/src/notebook.py b/acceptance/bundle/deploy/immutable/src/notebook.py new file mode 100644 index 00000000000..fb3c9597fbf --- /dev/null +++ b/acceptance/bundle/deploy/immutable/src/notebook.py @@ -0,0 +1,3 @@ +# Databricks notebook source + +print("Hello from Notebook Task!") diff --git a/acceptance/bundle/deploy/immutable/test.toml b/acceptance/bundle/deploy/immutable/test.toml new file mode 100644 index 00000000000..be964aff9b5 --- /dev/null +++ b/acceptance/bundle/deploy/immutable/test.toml @@ -0,0 +1,10 @@ +Local = false +Cloud = true + +Ignore = [ + "databricks.yml", + ".databricks", + ".venv", + "script", + "*.pyc", +] From 67914d073fdcc811c31cb4aaae024cfd5ce40e34 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 28 May 2026 12:18:28 +0200 Subject: [PATCH 04/15] fix for notebook import --- acceptance/bundle/deploy/immutable/output.txt | 28 +++++++++---------- bundle/deploy/snapshot/path.go | 9 ------ bundle/phases/build.go | 1 - 3 files changed, 13 insertions(+), 25 deletions(-) diff --git a/acceptance/bundle/deploy/immutable/output.txt b/acceptance/bundle/deploy/immutable/output.txt index 5ebc1c24ae0..5b9e74d23d0 100644 --- a/acceptance/bundle/deploy/immutable/output.txt +++ b/acceptance/bundle/deploy/immutable/output.txt @@ -16,33 +16,33 @@ Updating deployment state... Deployment complete! >>> [CLI] jobs get [NUMID] -"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/main.py" +"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/11f80ca6d8923bf75b57e475d4ca9ba4bb1d6d48c58aace8d3f2a1289b51c6e0/src/files/src/main.py" >>> [CLI] jobs get [NUMID] -"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook" +"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/11f80ca6d8923bf75b57e475d4ca9ba4bb1d6d48c58aace8d3f2a1289b51c6e0/src/files/src/notebook" >>> [CLI] jobs get [NUMID] [ - "/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/artifacts/.internal/immutable-0.0.1-py3-none-any.whl" + "/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/11f80ca6d8923bf75b57e475d4ca9ba4bb1d6d48c58aace8d3f2a1289b51c6e0/src/artifacts/.internal/immutable-0.0.1-py3-none-any.whl" ] >>> [CLI] bundle run my_job Run URL: [DATABRICKS_URL]/?o=[NUMID]#job/[NUMID]/run/[NUMID] [TIMESTAMP] "my job" RUNNING -[TIMESTAMP] "my job" INTERNAL_ERROR FAILED Task notebook_task failed with message: Unexpected failure while fetching notebook at /Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook: '/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook' is not a notebook. -Task notebook_task FAILED: -Run failed with error message - Unexpected failure while fetching notebook at /Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook: '/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook' is not a notebook. +[TIMESTAMP] "my job" TERMINATED SUCCESS +Output: +======= +Task python_wheel_task: +Hello from Python Wheel Task! +======= +Task notebook_task: -Error: Task notebook_task failed! -Error: -Run failed with error message - Unexpected failure while fetching notebook at /Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook: '/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook' is not a notebook. -Trace: +======= +Task spark_python_task: +Hello from Spark Python Task! -Error: failed to reach TERMINATED or SKIPPED, got INTERNAL_ERROR: Task notebook_task failed with message: Unexpected failure while fetching notebook at /Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook: '/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/73e0ad2064ac9c[NUMID]bd827ea506a8437f37516d3ba66ffa34a90276/src/files/src/notebook' is not a notebook. >>> [CLI] bundle destroy --auto-approve The following resources will be deleted: @@ -52,5 +52,3 @@ All files and directories at the following location will be deleted: /Workspace/ Deleting files... Destroy complete! - -Exit code: 1 diff --git a/bundle/deploy/snapshot/path.go b/bundle/deploy/snapshot/path.go index 3be2d195a8c..82c5d543680 100644 --- a/bundle/deploy/snapshot/path.go +++ b/bundle/deploy/snapshot/path.go @@ -11,13 +11,11 @@ import ( "os" "path/filepath" "slices" - "strings" "time" "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/fileset" "github.com/databricks/cli/libs/git" - "github.com/databricks/cli/libs/notebook" "github.com/databricks/cli/libs/set" ) @@ -151,14 +149,7 @@ func addSyncRootToZip(ctx context.Context, zw *zip.Writer, b *bundle.Bundle) err return fmt.Errorf("open %s: %w", f.Relative, err) } - // Notebooks are stored without their file extension, matching how - // Databricks workspace imports them (e.g. sample_notebook.ipynb → - // sample_notebook). Job tasks reference the extension-stripped path. entryPath := filepath.ToSlash(f.Relative) - if isNb, _, nbErr := notebook.DetectWithFS(b.SyncRoot, f.Relative); nbErr == nil && isNb { - entryPath = strings.TrimSuffix(entryPath, filepath.Ext(entryPath)) - } - h := &zip.FileHeader{ Name: "files/" + entryPath, Method: zip.Deflate, diff --git a/bundle/phases/build.go b/bundle/phases/build.go index a0649e373f0..26dc9bede06 100644 --- a/bundle/phases/build.go +++ b/bundle/phases/build.go @@ -14,7 +14,6 @@ import ( "github.com/databricks/cli/libs/logdiag" ) - type LibLocationMap map[string][]libraries.LocationToUpdate // The build phase builds artifacts. From 549492a3b44d577b67afdd135a3752f1c04cb0b7 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 28 May 2026 12:40:53 +0200 Subject: [PATCH 05/15] removed unused function --- bundle/deploy/snapshot/upload.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/bundle/deploy/snapshot/upload.go b/bundle/deploy/snapshot/upload.go index 73d84eb9703..e3e9efa011e 100644 --- a/bundle/deploy/snapshot/upload.go +++ b/bundle/deploy/snapshot/upload.go @@ -24,12 +24,6 @@ func Upload() bundle.Mutator { return &snapshotUpload{} } -// UploadWithClient returns an upload mutator backed by the provided SnapshotUploader. -// This is intended for use in tests. -func UploadWithClient(uploader filer.SnapshotUploader) bundle.Mutator { - return &snapshotUpload{uploader: uploader} -} - func (m *snapshotUpload) Name() string { return "snapshot.Upload" } From aedfdb03398a68a5b7d42dbacd8b194a1cb8db14 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 1 Jun 2026 13:38:31 +0200 Subject: [PATCH 06/15] fix schema + unit test --- bundle/deploy/snapshot/path_test.go | 5 ++--- bundle/schema/jsonschema.json | 4 ++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/bundle/deploy/snapshot/path_test.go b/bundle/deploy/snapshot/path_test.go index 157cdf2c9ba..9728fadb507 100644 --- a/bundle/deploy/snapshot/path_test.go +++ b/bundle/deploy/snapshot/path_test.go @@ -113,7 +113,7 @@ func zipEntryNames(t *testing.T, zipContent []byte) []string { return names } -func TestBundleZipStripsNotebookExtensions(t *testing.T) { +func TestBundleZipDoNotStripNotebookExtensions(t *testing.T) { // Minimal valid Jupyter notebook content. ipynb := `{"nbformat": 4, "nbformat_minor": 5, "cells": [], "metadata": {}}` b := makeBundleWithFiles(t, map[string]string{ @@ -125,7 +125,6 @@ func TestBundleZipStripsNotebookExtensions(t *testing.T) { require.NoError(t, err) names := zipEntryNames(t, zipContent) - assert.True(t, slices.Contains(names, "files/src/my_notebook"), "notebook should have extension stripped") - assert.False(t, slices.Contains(names, "files/src/my_notebook.ipynb"), "notebook should not appear with .ipynb extension") + assert.True(t, slices.Contains(names, "files/src/my_notebook.ipynb"), "notebook should have extension stripped") assert.True(t, slices.Contains(names, "files/src/script.py"), "regular Python file should keep its extension") } diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index ee105a6f821..28ffeb6205f 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -2239,6 +2239,10 @@ "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Git", "markdownDescription": "The Git version control details that are associated with your bundle. For supported attributes see [git](https://docs.databricks.com/dev-tools/bundles/settings.html#git)." }, + "immutable": { + "description": "Whether to upload bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a zip and uploaded via the snapshot API, and workspace.file_path and workspace.artifact_path are set to the returned content-addressed path. The validate and plan commands make no mutative API calls when this is enabled.", + "$ref": "#/$defs/bool" + }, "name": { "description": "The name of the bundle.", "$ref": "#/$defs/string" From eddec613463ac23a100d7f9da8c1ecf6d2a533e9 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 9 Jun 2026 12:49:24 +0200 Subject: [PATCH 07/15] use immutable_folder config --- acceptance/bundle/deploy/immutable/databricks.yml.tmpl | 3 ++- .../validate/immutable_workspace_paths/databricks.yml | 3 ++- bundle/config/bundle.go | 6 ------ bundle/config/deployment.go | 7 +++++++ .../mutator/resourcemutator/process_static_resources.go | 2 +- bundle/config/mutator/resourcemutator/resource_mutator.go | 2 +- bundle/config/mutator/translate_paths.go | 2 +- bundle/internal/schema/annotations.yml | 6 +++--- bundle/phases/build.go | 2 +- bundle/phases/deploy.go | 4 ++-- bundle/phases/destroy.go | 2 +- bundle/schema/jsonschema.json | 8 ++++---- 12 files changed, 25 insertions(+), 22 deletions(-) diff --git a/acceptance/bundle/deploy/immutable/databricks.yml.tmpl b/acceptance/bundle/deploy/immutable/databricks.yml.tmpl index 23805de2402..6dd62eee1fb 100644 --- a/acceptance/bundle/deploy/immutable/databricks.yml.tmpl +++ b/acceptance/bundle/deploy/immutable/databricks.yml.tmpl @@ -1,6 +1,7 @@ bundle: name: test-bundle-immutable-$UNIQUE_NAME - immutable: true + deployment: + immutable_folder: true artifacts: python_artifact: diff --git a/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml b/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml index 39c25fb365f..b6ce1bd97d1 100644 --- a/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml +++ b/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml @@ -1,6 +1,7 @@ bundle: name: my-bundle - immutable: true + deployment: + immutable_folder: true sync: exclude: diff --git a/bundle/config/bundle.go b/bundle/config/bundle.go index 79f92b6b435..473e355225f 100644 --- a/bundle/config/bundle.go +++ b/bundle/config/bundle.go @@ -60,10 +60,4 @@ type Bundle struct { // Databricks first party template when a user runs bundle init. Uuid string `json:"uuid,omitempty"` - // Immutable specifies that bundle files and artifacts are uploaded as a single - // immutable snapshot rather than being synced individually. When true, the - // deployment calls /api/2.0/repos/snapshots with a zip containing all files - // and sets workspace.file_path and workspace.artifact_path to the returned - // content-addressed path. validate and plan make no mutative API calls. - Immutable bool `json:"immutable,omitempty"` } diff --git a/bundle/config/deployment.go b/bundle/config/deployment.go index b7efb4456f9..8be0596823e 100644 --- a/bundle/config/deployment.go +++ b/bundle/config/deployment.go @@ -5,6 +5,13 @@ type Deployment struct { // running jobs or pipelines in the workspace. Defaults to false. FailOnActiveRuns bool `json:"fail_on_active_runs,omitempty"` + // ImmutableFolder specifies that bundle files and artifacts are uploaded as a + // single immutable snapshot rather than being synced individually. When true, + // the deployment calls /api/2.0/repos/snapshots with a zip containing all files + // and sets workspace.file_path and workspace.artifact_path to the returned + // content-addressed path. validate and plan make no mutative API calls. + ImmutableFolder bool `json:"immutable_folder,omitempty"` + // Lock configures locking behavior on deployment. Lock Lock `json:"lock,omitempty"` } diff --git a/bundle/config/mutator/resourcemutator/process_static_resources.go b/bundle/config/mutator/resourcemutator/process_static_resources.go index 94c823c75b8..9040c14ee3d 100644 --- a/bundle/config/mutator/resourcemutator/process_static_resources.go +++ b/bundle/config/mutator/resourcemutator/process_static_resources.go @@ -43,7 +43,7 @@ func (p processStaticResources) Apply(ctx context.Context, b *bundle.Bundle) dia // The actual snapshot path is only known after snapshot.Upload() returns the // API-assigned path in the deploy phase. var resourceResolver bundle.Mutator - if b.Config.Bundle.Immutable { + if b.Config.Bundle.Deployment.ImmutableFolder { resourceResolver = mutator.ResolveVariableReferencesOnlyResourcesExcluding( "workspace.snapshot_path", ) diff --git a/bundle/config/mutator/resourcemutator/resource_mutator.go b/bundle/config/mutator/resourcemutator/resource_mutator.go index 31afb65ffa2..fdb813aba47 100644 --- a/bundle/config/mutator/resourcemutator/resource_mutator.go +++ b/bundle/config/mutator/resourcemutator/resource_mutator.go @@ -132,7 +132,7 @@ func applyInitializeMutators(ctx context.Context, b *bundle.Bundle) { // ${workspace.artifact_path} are excluded: the API assigns the snapshot path // after upload, so they must remain as-is until snapshot.Upload() has run. func resourceVarResolver(b *bundle.Bundle) bundle.Mutator { - if b.Config.Bundle.Immutable { + if b.Config.Bundle.Deployment.ImmutableFolder { return mutator.ResolveVariableReferencesOnlyResourcesExcluding( "workspace.file_path", "workspace.artifact_path", ) diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index 1d38cee2361..66605e69be7 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -327,7 +327,7 @@ const snapshotFilesRoot = "${workspace.snapshot_path}/src/files" func applyTranslations(ctx context.Context, b *bundle.Bundle, t *translateContext, translations []func(context.Context, dyn.Value) (dyn.Value, error)) diag.Diagnostics { switch { - case b.Config.Bundle.Immutable: + case b.Config.Bundle.Deployment.ImmutableFolder: // Use a placeholder root that is resolved after snapshot.Upload() sets // workspace.snapshot_path. This defers path computation until the actual // content-addressed path is known. diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index e63f6625d1d..735d0cc4c81 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -50,9 +50,6 @@ github.com/databricks/cli/bundle/config.Bundle: The Git version control details that are associated with your bundle. "markdown_description": |- The Git version control details that are associated with your bundle. For supported attributes see [\_](/dev-tools/bundles/settings.md#git). - "immutable": - "description": |- - Whether to upload bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a zip and uploaded via the snapshot API, and workspace.file_path and workspace.artifact_path are set to the returned content-addressed path. The validate and plan commands make no mutative API calls when this is enabled. "name": "description": |- The name of the bundle. @@ -63,6 +60,9 @@ github.com/databricks/cli/bundle/config.Deployment: "fail_on_active_runs": "description": |- Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. + "immutable_folder": + "description": |- + Whether to upload bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a zip and uploaded via the snapshot API, and workspace.file_path and workspace.artifact_path are set to the returned content-addressed path. The validate and plan commands make no mutative API calls when this is enabled. "lock": "description": |- The deployment lock attributes. diff --git a/bundle/phases/build.go b/bundle/phases/build.go index 26dc9bede06..8411e376ebb 100644 --- a/bundle/phases/build.go +++ b/bundle/phases/build.go @@ -44,7 +44,7 @@ func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { // For immutable bundles, library remote paths are set in the deploy phase // after snapshot.Upload() provides the content-addressed workspace.artifact_path. - if b.Config.Bundle.Immutable { + if b.Config.Bundle.Deployment.ImmutableFolder { return nil } diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 5534354bc61..6eee62a38ec 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -133,7 +133,7 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand bundle.ApplyContext(ctx, b, lock.Release(lock.GoalDeploy)) }() - if b.Config.Bundle.Immutable { + if b.Config.Bundle.Deployment.ImmutableFolder { // Upload all source files and built artifacts as a single immutable snapshot. // The API assigns a content-addressed path, so workspace.snapshot_path (and // derived workspace.file_path / workspace.artifact_path) are only known after @@ -157,7 +157,7 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand return } - if !b.Config.Bundle.Immutable { + if !b.Config.Bundle.Deployment.ImmutableFolder { bundle.ApplySeqContext(ctx, b, files.Upload(outputHandler)) if logdiag.HasError(ctx) { return diff --git a/bundle/phases/destroy.go b/bundle/phases/destroy.go index 71d5468c253..a2890d56f5a 100644 --- a/bundle/phases/destroy.go +++ b/bundle/phases/destroy.go @@ -132,7 +132,7 @@ func Destroy(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) { mutator.ResolveVariableReferencesOnlyResources("artifacts"), } - if b.Config.Bundle.Immutable { + if b.Config.Bundle.Deployment.ImmutableFolder { // For immutable bundles, resource paths contain ${workspace.snapshot_path} // which was set during deploy by snapshot.Upload(). Load it from the stored // metadata so it can be resolved before Terraform processes the config. diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 28ffeb6205f..50f5045cf70 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -2239,10 +2239,6 @@ "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Git", "markdownDescription": "The Git version control details that are associated with your bundle. For supported attributes see [git](https://docs.databricks.com/dev-tools/bundles/settings.html#git)." }, - "immutable": { - "description": "Whether to upload bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a zip and uploaded via the snapshot API, and workspace.file_path and workspace.artifact_path are set to the returned content-addressed path. The validate and plan commands make no mutative API calls when this is enabled.", - "$ref": "#/$defs/bool" - }, "name": { "description": "The name of the bundle.", "$ref": "#/$defs/string" @@ -2275,6 +2271,10 @@ "description": "Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted.", "$ref": "#/$defs/bool" }, + "immutable_folder": { + "description": "Whether to upload bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a zip and uploaded via the snapshot API, and workspace.file_path and workspace.artifact_path are set to the returned content-addressed path. The validate and plan commands make no mutative API calls when this is enabled.", + "$ref": "#/$defs/bool" + }, "lock": { "description": "The deployment lock attributes.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Lock" From 4a9bcd9fd49c7eb39eaec34d078284eb558fa025 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 9 Jun 2026 12:52:49 +0200 Subject: [PATCH 08/15] remove merge conflict --- .../config/mutator/resourcemutator/process_static_resources.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/bundle/config/mutator/resourcemutator/process_static_resources.go b/bundle/config/mutator/resourcemutator/process_static_resources.go index 9040c14ee3d..8b8dcbbe3b8 100644 --- a/bundle/config/mutator/resourcemutator/process_static_resources.go +++ b/bundle/config/mutator/resourcemutator/process_static_resources.go @@ -58,8 +58,6 @@ func (p processStaticResources) Apply(ctx context.Context, b *bundle.Bundle) dia // Updates (dynamic): resources.* (strings) (resolves variable references to their actual values) // Resolves variable references in 'resources' using bundle, workspace, and variables prefixes resourceResolver, - // After normal variable resolution, log all ${resources.*} references - mutator.LogResourceReferences(), mutator.NormalizePaths(), // Translate dashboard paths into paths in the workspace file system From ebd26ea28e4e8448cc740fdf2ac017ac1baa8a6a Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 9 Jun 2026 13:13:05 +0200 Subject: [PATCH 09/15] fix empty artifact path + tests --- .../databricks.yml.tmpl | 16 ++++++++++ .../immutable-no-artifacts/out.test.toml | 3 ++ .../deploy/immutable-no-artifacts/output.txt | 30 +++++++++++++++++++ .../deploy/immutable-no-artifacts/script | 14 +++++++++ .../deploy/immutable-no-artifacts/src/main.py | 1 + .../immutable-no-artifacts/src/notebook.py | 3 ++ .../deploy/immutable-no-artifacts/test.toml | 16 ++++++++++ bundle/deploy/snapshot/upload.go | 6 +++- 8 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl create mode 100644 acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml create mode 100644 acceptance/bundle/deploy/immutable-no-artifacts/output.txt create mode 100644 acceptance/bundle/deploy/immutable-no-artifacts/script create mode 100644 acceptance/bundle/deploy/immutable-no-artifacts/src/main.py create mode 100644 acceptance/bundle/deploy/immutable-no-artifacts/src/notebook.py create mode 100644 acceptance/bundle/deploy/immutable-no-artifacts/test.toml diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl b/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl new file mode 100644 index 00000000000..ac83678a5c0 --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl @@ -0,0 +1,16 @@ +bundle: + name: test-bundle-immutable-no-artifacts-$UNIQUE_NAME + deployment: + immutable_folder: true + +resources: + jobs: + my_job: + name: my job + tasks: + - task_key: spark_python_task + spark_python_task: + python_file: ./src/main.py + - task_key: notebook_task + notebook_task: + notebook_path: ./src/notebook.py diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml b/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml new file mode 100644 index 00000000000..650836edeb3 --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml @@ -0,0 +1,3 @@ +Local = false +Cloud = true +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/output.txt b/acceptance/bundle/deploy/immutable-no-artifacts/output.txt new file mode 100644 index 00000000000..52398f774a3 --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/output.txt @@ -0,0 +1,30 @@ + +>>> [CLI] bundle validate +Name: test-bundle-immutable-no-artifacts-[UNIQUE_NAME] +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/default + +Validation OK! + +>>> [CLI] bundle deploy +Uploading immutable bundle snapshot... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] jobs get [NUMID] +"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/main.py" + +>>> [CLI] jobs get [NUMID] +"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/notebook" + +>>> [CLI] bundle destroy --auto-approve +The following resources will be deleted: + delete resources.jobs.my_job + +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/default + +Deleting files... +Destroy complete! diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/script b/acceptance/bundle/deploy/immutable-no-artifacts/script new file mode 100644 index 00000000000..21541fb79ee --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/script @@ -0,0 +1,14 @@ +envsubst < databricks.yml.tmpl > databricks.yml +cleanup() { + trace $CLI bundle destroy --auto-approve +} +trap cleanup EXIT + +trace $CLI bundle validate +trace $CLI bundle deploy + + +# Get a job and check that task paths point into the snapshot +JOB_ID=$($CLI bundle summary -o json | jq -r '.resources.jobs.my_job.id') +trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.spark_python_task != null) | .spark_python_task.python_file' +trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.notebook_task != null) | .notebook_task.notebook_path' diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/src/main.py b/acceptance/bundle/deploy/immutable-no-artifacts/src/main.py new file mode 100644 index 00000000000..6c285f7e2f5 --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/src/main.py @@ -0,0 +1 @@ +print("Hello from Spark Python Task!") diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/src/notebook.py b/acceptance/bundle/deploy/immutable-no-artifacts/src/notebook.py new file mode 100644 index 00000000000..fb3c9597fbf --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/src/notebook.py @@ -0,0 +1,3 @@ +# Databricks notebook source + +print("Hello from Notebook Task!") diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/test.toml b/acceptance/bundle/deploy/immutable-no-artifacts/test.toml new file mode 100644 index 00000000000..8cdb1604a3a --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/test.toml @@ -0,0 +1,16 @@ +Local = false +Cloud = true + +Ignore = [ + "databricks.yml", + ".databricks", + ".venv", + "script", + "*.pyc", +] + +# Normalize the content-addressed snapshot hash so it doesn't need to be +# hardcoded in output.txt and the test stays stable across file changes. +[[Repls]] +Old = '[0-9a-f]{64}' +New = '[SNAPSHOT_HASH]' diff --git a/bundle/deploy/snapshot/upload.go b/bundle/deploy/snapshot/upload.go index e3e9efa011e..4eea5757cdd 100644 --- a/bundle/deploy/snapshot/upload.go +++ b/bundle/deploy/snapshot/upload.go @@ -57,7 +57,11 @@ func (m *snapshotUpload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagn // The API unpacks the zip under a "src" subdirectory. b.Config.Workspace.SnapshotPath = info.Path b.Config.Workspace.FilePath = path.Join(info.Path, "src", "files") - b.Config.Workspace.ArtifactPath = path.Join(info.Path, "src", "artifacts") + // Only set artifact_path when artifacts are present; with no artifacts the + // zip has no "src/artifacts" directory and a get-status on it would 404. + if len(b.Config.Artifacts) > 0 { + b.Config.Workspace.ArtifactPath = path.Join(info.Path, "src", "artifacts") + } return nil } From 5efe1da3130352b65e86c3b3f913fe5f1a5eeaa3 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 9 Jun 2026 15:08:39 +0200 Subject: [PATCH 10/15] fixed test config --- .../deploy/immutable-no-artifacts/databricks.yml.tmpl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl b/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl index ac83678a5c0..caacf79f907 100644 --- a/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl +++ b/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl @@ -11,6 +11,12 @@ resources: - task_key: spark_python_task spark_python_task: python_file: ./src/main.py + environment_key: env - task_key: notebook_task notebook_task: notebook_path: ./src/notebook.py + + environments: + - environment_key: env + spec: + environment_version: "4" From 6215a49ecc85258237d86b5fe921f80b71b54969 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 18 Jun 2026 16:06:25 +0200 Subject: [PATCH 11/15] fixes --- .../immutable-no-artifacts/out.test.toml | 2 +- .../deploy/immutable-no-artifacts/test.toml | 2 +- acceptance/bundle/deploy/immutable/output.txt | 21 ++--- acceptance/bundle/deploy/immutable/script | 3 +- acceptance/bundle/deploy/immutable/test.toml | 6 ++ .../immutable_workspace_paths/output.txt | 4 +- .../mutator/resolve_variable_references.go | 15 ---- .../resolve_variable_references_test.go | 45 ----------- .../process_static_resources.go | 12 +-- .../resourcemutator/resource_mutator.go | 19 +---- bundle/config/mutator/translate_paths.go | 12 +-- bundle/config/workspace.go | 6 +- bundle/deploy/metadata/load.go | 57 -------------- .../deploy/snapshot/client.go | 12 +-- bundle/deploy/snapshot/path.go | 76 +++---------------- bundle/deploy/snapshot/path_test.go | 2 +- bundle/deploy/snapshot/state.go | 63 +++++++++++++++ bundle/deploy/snapshot/translate_paths.go | 50 ++++++++++++ bundle/deploy/snapshot/upload.go | 5 +- bundle/phases/build.go | 20 ++--- bundle/phases/deploy.go | 15 ++-- bundle/phases/destroy.go | 12 +-- cmd/bundle/utils/process.go | 1 - libs/sync/sync.go | 29 +++++++ libs/testserver/handlers.go | 43 +++++++++++ 25 files changed, 254 insertions(+), 278 deletions(-) delete mode 100644 bundle/deploy/metadata/load.go rename libs/filer/snapshot_client.go => bundle/deploy/snapshot/client.go (91%) create mode 100644 bundle/deploy/snapshot/state.go create mode 100644 bundle/deploy/snapshot/translate_paths.go diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml b/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml index 650836edeb3..bbc7fcfd1bd 100644 --- a/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml +++ b/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml @@ -1,3 +1,3 @@ -Local = false +Local = true Cloud = true EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/test.toml b/acceptance/bundle/deploy/immutable-no-artifacts/test.toml index 8cdb1604a3a..21acfa23bd6 100644 --- a/acceptance/bundle/deploy/immutable-no-artifacts/test.toml +++ b/acceptance/bundle/deploy/immutable-no-artifacts/test.toml @@ -1,4 +1,4 @@ -Local = false +Local = true Cloud = true Ignore = [ diff --git a/acceptance/bundle/deploy/immutable/output.txt b/acceptance/bundle/deploy/immutable/output.txt index 5b9e74d23d0..5fc2ed07493 100644 --- a/acceptance/bundle/deploy/immutable/output.txt +++ b/acceptance/bundle/deploy/immutable/output.txt @@ -16,33 +16,22 @@ Updating deployment state... Deployment complete! >>> [CLI] jobs get [NUMID] -"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/11f80ca6d8923bf75b57e475d4ca9ba4bb1d6d48c58aace8d3f2a1289b51c6e0/src/files/src/main.py" +"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/main.py" >>> [CLI] jobs get [NUMID] -"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/11f80ca6d8923bf75b57e475d4ca9ba4bb1d6d48c58aace8d3f2a1289b51c6e0/src/files/src/notebook" +"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/notebook" >>> [CLI] jobs get [NUMID] [ - "/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/11f80ca6d8923bf75b57e475d4ca9ba4bb1d6d48c58aace8d3f2a1289b51c6e0/src/artifacts/.internal/immutable-0.0.1-py3-none-any.whl" + "/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/artifacts/.internal/immutable-0.0.1-py3-none-any.whl" ] >>> [CLI] bundle run my_job +script: line 182: sort_lines: command not found Run URL: [DATABRICKS_URL]/?o=[NUMID]#job/[NUMID]/run/[NUMID] [TIMESTAMP] "my job" RUNNING [TIMESTAMP] "my job" TERMINATED SUCCESS -Output: -======= -Task python_wheel_task: -Hello from Python Wheel Task! - -======= -Task notebook_task: - -======= -Task spark_python_task: -Hello from Spark Python Task! - >>> [CLI] bundle destroy --auto-approve The following resources will be deleted: @@ -52,3 +41,5 @@ All files and directories at the following location will be deleted: /Workspace/ Deleting files... Destroy complete! + +Exit code: 127 diff --git a/acceptance/bundle/deploy/immutable/script b/acceptance/bundle/deploy/immutable/script index 57d5cfbf3ec..c783212308a 100644 --- a/acceptance/bundle/deploy/immutable/script +++ b/acceptance/bundle/deploy/immutable/script @@ -14,4 +14,5 @@ trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.spark_pyt trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.notebook_task != null) | .notebook_task.notebook_path' trace $CLI jobs get $JOB_ID | jq '.settings.environments[0].spec.dependencies' -trace $CLI bundle run my_job +# Sort output to make it stable +trace $CLI bundle run my_job | sort_lines diff --git a/acceptance/bundle/deploy/immutable/test.toml b/acceptance/bundle/deploy/immutable/test.toml index be964aff9b5..a97b714bea8 100644 --- a/acceptance/bundle/deploy/immutable/test.toml +++ b/acceptance/bundle/deploy/immutable/test.toml @@ -2,9 +2,15 @@ Local = false Cloud = true Ignore = [ + "dist", "databricks.yml", ".databricks", ".venv", "script", "*.pyc", ] + +[[Repls]] +# Replace snapshot hash with SNAPSHOT_HASH +Old = "[0-9a-f]{64}" +New = "[SNAPSHOT_HASH]" diff --git a/acceptance/bundle/validate/immutable_workspace_paths/output.txt b/acceptance/bundle/validate/immutable_workspace_paths/output.txt index f6a8004bf2f..da1a2f05768 100644 --- a/acceptance/bundle/validate/immutable_workspace_paths/output.txt +++ b/acceptance/bundle/validate/immutable_workspace_paths/output.txt @@ -2,7 +2,7 @@ >>> [CLI] bundle validate -o json Warning: Pattern user_repls.json does not match any files at sync.exclude[1] - in databricks.yml:9:7 + in databricks.yml:10:7 { "workspace": { @@ -22,7 +22,7 @@ Warning: Pattern user_repls.json does not match any files { "existing_cluster_id": "0101-120000-aaaaaaaa", "spark_python_task": { - "python_file": "${workspace.snapshot_path}/src/files/src/main.py" + "python_file": "[TEST_TMP_DIR]/src/main.py" }, "task_key": "my_task" } diff --git a/bundle/config/mutator/resolve_variable_references.go b/bundle/config/mutator/resolve_variable_references.go index 70a330c8a3c..4da02a31b04 100644 --- a/bundle/config/mutator/resolve_variable_references.go +++ b/bundle/config/mutator/resolve_variable_references.go @@ -79,21 +79,6 @@ func ResolveVariableReferencesOnlyResources(prefixes ...string) bundle.Mutator { } } -// ResolveVariableReferencesOnlyResourcesExcluding resolves variable references in -// resources while leaving references to the specified paths unresolved. -// Used by ProcessStaticResources for immutable bundles so that ${workspace.snapshot_path} -// is not resolved during Initialize; it is resolved in the Deploy phase after -// snapshot.Upload() sets workspace.snapshot_path to the API-assigned path. -func ResolveVariableReferencesOnlyResourcesExcluding(excludePaths ...string) bundle.Mutator { - return &resolveVariableReferences{ - prefixes: defaultPrefixes, - lookupFn: lookup, - extraRounds: maxResolutionRounds - 1, - pattern: dyn.NewPattern(dyn.Key("resources")), - includeResources: true, - excludePaths: excludePaths, - } -} func ResolveVariableReferencesWithoutResources(prefixes ...string) bundle.Mutator { if len(prefixes) == 0 { diff --git a/bundle/config/mutator/resolve_variable_references_test.go b/bundle/config/mutator/resolve_variable_references_test.go index f682419f32a..876980e9486 100644 --- a/bundle/config/mutator/resolve_variable_references_test.go +++ b/bundle/config/mutator/resolve_variable_references_test.go @@ -6,9 +6,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" - "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/databricks/databricks-sdk-go/service/pipelines" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -65,46 +63,3 @@ func TestResolveVariableReferencesWithSourceLinkedDeployment(t *testing.T) { testCase.assert(t, b) } } - -// TestResolveVariableReferencesExcludePaths verifies that paths listed in excludePaths -// are skipped during resolution and left as unresolved variable references. -// This is used by ProcessStaticResources for immutable bundles so that -// ${workspace.file_path} and ${workspace.artifact_path} can be resolved later -// (in the Build phase, after artifacts are built and the correct snapshot path is known). -func TestResolveVariableReferencesExcludePaths(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Workspace: config.Workspace{ - FilePath: "/snapshot/path/src/files", - ArtifactPath: "/snapshot/path/src/artifacts", - }, - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": { - JobSettings: jobs.JobSettings{ - Tasks: []jobs.Task{ - { - SparkPythonTask: &jobs.SparkPythonTask{ - PythonFile: "${workspace.file_path}/main.py", - }, - }, - }, - }, - }, - }, - }, - }, - } - - // With exclusion: ${workspace.file_path} should remain unresolved. - diags := bundle.Apply(t.Context(), b, ResolveVariableReferencesOnlyResourcesExcluding("workspace.file_path", "workspace.artifact_path")) - require.NoError(t, diags.Error()) - assert.Equal(t, "${workspace.file_path}/main.py", b.Config.Resources.Jobs["job1"].Tasks[0].SparkPythonTask.PythonFile, - "reference should remain unresolved when path is excluded") - - // Without exclusion: ${workspace.file_path} should resolve normally. - diags = bundle.Apply(t.Context(), b, ResolveVariableReferencesOnlyResources()) - require.NoError(t, diags.Error()) - assert.Equal(t, "/snapshot/path/src/files/main.py", b.Config.Resources.Jobs["job1"].Tasks[0].SparkPythonTask.PythonFile, - "reference should resolve after exclusion is lifted") -} diff --git a/bundle/config/mutator/resourcemutator/process_static_resources.go b/bundle/config/mutator/resourcemutator/process_static_resources.go index 8b8dcbbe3b8..5fcd54de33f 100644 --- a/bundle/config/mutator/resourcemutator/process_static_resources.go +++ b/bundle/config/mutator/resourcemutator/process_static_resources.go @@ -39,17 +39,7 @@ func (p processStaticResources) Apply(ctx context.Context, b *bundle.Bundle) dia // - variable can be used a prefix // - path can be part of a complex variable value - // For immutable bundles, defer resolving ${workspace.snapshot_path} in resources. - // The actual snapshot path is only known after snapshot.Upload() returns the - // API-assigned path in the deploy phase. - var resourceResolver bundle.Mutator - if b.Config.Bundle.Deployment.ImmutableFolder { - resourceResolver = mutator.ResolveVariableReferencesOnlyResourcesExcluding( - "workspace.snapshot_path", - ) - } else { - resourceResolver = mutator.ResolveVariableReferencesOnlyResources() - } + resourceResolver := mutator.ResolveVariableReferencesOnlyResources() bundle.ApplySeqContext( ctx, diff --git a/bundle/config/mutator/resourcemutator/resource_mutator.go b/bundle/config/mutator/resourcemutator/resource_mutator.go index ebfa964d8bf..209bbcb06a0 100644 --- a/bundle/config/mutator/resourcemutator/resource_mutator.go +++ b/bundle/config/mutator/resourcemutator/resource_mutator.go @@ -127,19 +127,6 @@ func applyInitializeMutators(ctx context.Context, b *bundle.Bundle) { ) } -// resourceVarResolver returns a mutator that resolves variable references in -// resources. For immutable bundles, ${workspace.file_path} and -// ${workspace.artifact_path} are excluded: the API assigns the snapshot path -// after upload, so they must remain as-is until snapshot.Upload() has run. -func resourceVarResolver(b *bundle.Bundle) bundle.Mutator { - if b.Config.Bundle.Deployment.ImmutableFolder { - return mutator.ResolveVariableReferencesOnlyResourcesExcluding( - "workspace.file_path", "workspace.artifact_path", - ) - } - return mutator.ResolveVariableReferencesOnlyResources() -} - // Normalization is applied multiple times if resource is modified during initialization // // If bundle is modified outside of 'resources' section, these changes are discarded. @@ -152,10 +139,8 @@ func applyNormalizeMutators(ctx context.Context, b *bundle.Bundle) { // Reads (dynamic): * (strings) (searches for variable references in string values) // Updates (dynamic): resources.* (strings) (resolves variable references to their actual values) - // Resolves variable references in 'resources' using bundle, workspace, and variables prefixes. - // For immutable bundles, ${workspace.file_path} and ${workspace.artifact_path} are left - // unresolved: the actual snapshot path is assigned by the API after upload, not pre-computed. - resourceVarResolver(b), + // Resolves variable references in 'resources' using bundle, workspace, and variables prefixes + mutator.ResolveVariableReferencesOnlyResources(), // Reads (dynamic): resources.pipelines.*.libraries (checks for notebook.path and file.path fields) // Updates (dynamic): resources.pipelines.*.libraries (expands glob patterns in path fields to multiple library entries) diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index 44292a8dd6f..7b58d16b161 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -319,18 +319,12 @@ func (t *translateContext) rewriteValue(ctx context.Context, p dyn.Path, v dyn.V return dyn.NewValue(out, v.Locations()), nil } -// snapshotFilesRoot is the remote root used for file/notebook path translation -// in immutable bundles. References to this placeholder are resolved after -// snapshot.Upload() sets workspace.snapshot_path to the API-assigned path. -const snapshotFilesRoot = "${workspace.snapshot_path}/src/files" - func applyTranslations(ctx context.Context, b *bundle.Bundle, t *translateContext, translations []func(context.Context, dyn.Value) (dyn.Value, error)) diag.Diagnostics { switch { case b.Config.Bundle.Deployment.ImmutableFolder: - // Use a placeholder root that is resolved after snapshot.Upload() sets - // workspace.snapshot_path. This defers path computation until the actual - // content-addressed path is known. - t.remoteRoot = snapshotFilesRoot + // Keep paths as local absolute paths during validate. snapshot.TranslateResourcePaths() + // replaces this local prefix with the actual snapshot path after upload. + t.remoteRoot = t.b.SyncRootPath case config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment): t.remoteRoot = t.b.SyncRootPath default: diff --git a/bundle/config/workspace.go b/bundle/config/workspace.go index 284bd0afe02..b07545ac6c2 100644 --- a/bundle/config/workspace.go +++ b/bundle/config/workspace.go @@ -82,9 +82,9 @@ type Workspace struct { StatePath string `json:"state_path,omitempty"` // SnapshotPath is the workspace path of the immutable snapshot uploaded during - // deployment. It is set by snapshot.Upload() and used to resolve - // ${workspace.snapshot_path} references in resource configurations. - // Only populated for bundles with bundle.immutable = true. + // deployment. It is set by snapshot.Upload() and persisted to local state so + // that snapshot.LoadState() can restore workspace.file_path for destroy. + // Only populated for bundles with deployment.immutable_folder = true. SnapshotPath string `json:"snapshot_path,omitempty" bundle:"internal"` } diff --git a/bundle/deploy/metadata/load.go b/bundle/deploy/metadata/load.go deleted file mode 100644 index 3fe4eb1c3bd..00000000000 --- a/bundle/deploy/metadata/load.go +++ /dev/null @@ -1,57 +0,0 @@ -package metadata - -import ( - "context" - "encoding/json" - "io" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/metadata" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/filer" -) - -type load struct{} - -// Load reads the metadata file written during the last deploy and populates -// fields on the bundle that are not available locally (e.g. workspace.snapshot_path -// for immutable bundles, which is only known after snapshot.Upload() ran). -func Load() bundle.Mutator { - return &load{} -} - -func (m *load) Name() string { - return "metadata.Load" -} - -func (m *load) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - f, err := filer.NewWorkspaceFilesClient(b.WorkspaceClient(ctx), b.Config.Workspace.StatePath) - if err != nil { - return diag.FromErr(err) - } - - r, err := f.Read(ctx, metadataFileName) - if err != nil { - // Missing metadata file means the bundle was never deployed or was - // deployed by an older CLI version that didn't write metadata. Treat - // it as a no-op so destroy can still proceed. - return nil - } - defer r.Close() - - raw, err := io.ReadAll(r) - if err != nil { - return diag.FromErr(err) - } - - var md metadata.Metadata - if err := json.Unmarshal(raw, &md); err != nil { - return diag.FromErr(err) - } - - if md.Config.Workspace.SnapshotPath != "" { - b.Config.Workspace.SnapshotPath = md.Config.Workspace.SnapshotPath - } - - return nil -} diff --git a/libs/filer/snapshot_client.go b/bundle/deploy/snapshot/client.go similarity index 91% rename from libs/filer/snapshot_client.go rename to bundle/deploy/snapshot/client.go index a7d84891cb0..3a720cb2150 100644 --- a/libs/filer/snapshot_client.go +++ b/bundle/deploy/snapshot/client.go @@ -1,4 +1,4 @@ -package filer +package snapshot import ( "bytes" @@ -10,7 +10,7 @@ import ( "net/textproto" "github.com/databricks/databricks-sdk-go" - "github.com/databricks/databricks-sdk-go/client" + databricksclient "github.com/databricks/databricks-sdk-go/client" ) // SnapshotInfo holds the result of a successful snapshot upload. @@ -30,7 +30,7 @@ type SnapshotUploader interface { // snapshotAPIClient implements SnapshotUploader against /api/2.0/repos/snapshots. type snapshotAPIClient struct { - apiClient apiClient + client *databricksclient.DatabricksClient } // snapshotUploadResponse mirrors the /api/2.0/repos/snapshots response body. @@ -42,11 +42,11 @@ type snapshotUploadResponse struct { // NewSnapshotUploader creates a SnapshotUploader backed by /api/2.0/repos/snapshots. func NewSnapshotUploader(w *databricks.WorkspaceClient) (SnapshotUploader, error) { - apiClient, err := client.New(w.Config) + c, err := databricksclient.New(w.Config) if err != nil { return nil, err } - return &snapshotAPIClient{apiClient: apiClient}, nil + return &snapshotAPIClient{client: c}, nil } // Upload uploads zipContent as an immutable snapshot identified by snapshotID. @@ -94,7 +94,7 @@ func (c *snapshotAPIClient) Upload(ctx context.Context, bundleID, snapshotID, cu } var resp snapshotUploadResponse - err = c.apiClient.Do(ctx, http.MethodPost, "/api/2.0/repos/snapshots", headers, nil, body.Bytes(), &resp) + err = c.client.Do(ctx, http.MethodPost, "/api/2.0/repos/snapshots", headers, nil, body.Bytes(), &resp) if err != nil { return nil, fmt.Errorf("snapshot upload: %w", err) } diff --git a/bundle/deploy/snapshot/path.go b/bundle/deploy/snapshot/path.go index 82c5d543680..680461d2658 100644 --- a/bundle/deploy/snapshot/path.go +++ b/bundle/deploy/snapshot/path.go @@ -15,8 +15,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/fileset" - "github.com/databricks/cli/libs/git" - "github.com/databricks/cli/libs/set" + libsync "github.com/databricks/cli/libs/sync" ) // zipEpoch is a fixed timestamp used for all zip entries to make the zip content-addressed @@ -66,65 +65,18 @@ func SnapshotID(ctx context.Context, b *bundle.Bundle) (string, error) { return IDFromContent(content), nil } -// syncFiles returns the list of files to include in the snapshot zip using the -// same git-aware include/exclude logic as files.Upload (libs/sync). -func syncFiles(ctx context.Context, b *bundle.Bundle) ([]fileset.File, error) { - // Use git.NewFileSet so that .gitignore rules are respected, matching the - // behaviour of the normal files.Upload sync path. - // Avoid passing an empty/nil paths slice: git.NewFileSet forwards it to - // fileset.New whose variadic default ("." if no args) is bypassed when the - // caller explicitly passes a nil slice. The SyncDefaultPath mutator always - // sets Sync.Paths to ["."] in the normal pipeline; we replicate that here - // so BundleZip works even when the bundle hasn't gone through the full pipeline. - var gitFS *git.FileSet - var err error - if len(b.Config.Sync.Paths) > 0 { - gitFS, err = git.NewFileSet(ctx, b.WorktreeRoot, b.SyncRoot, b.Config.Sync.Paths) - } else { - gitFS, err = git.NewFileSet(ctx, b.WorktreeRoot, b.SyncRoot) - } - if err != nil { - return nil, fmt.Errorf("build file set: %w", err) - } - - all := set.NewSetF(func(f fileset.File) string { - return f.Relative +func addSyncRootToZip(ctx context.Context, zw *zip.Writer, b *bundle.Bundle) error { + files, err := libsync.GetFileList(ctx, libsync.SyncOptions{ + WorktreeRoot: b.WorktreeRoot, + LocalRoot: b.SyncRoot, + Paths: b.Config.Sync.Paths, + Include: b.Config.Sync.Include, + Exclude: b.Config.Sync.Exclude, }) - - gitFiles, err := gitFS.Files() if err != nil { - return nil, fmt.Errorf("list sync files: %w", err) - } - all.Add(gitFiles...) - - if len(b.Config.Sync.Include) > 0 { - includeFS, err := fileset.NewGlobSet(b.SyncRoot, b.Config.Sync.Include) - if err != nil { - return nil, fmt.Errorf("build include set: %w", err) - } - include, err := includeFS.Files() - if err != nil { - return nil, fmt.Errorf("list include files: %w", err) - } - all.Add(include...) - } - - if len(b.Config.Sync.Exclude) > 0 { - excludeFS, err := fileset.NewGlobSet(b.SyncRoot, b.Config.Sync.Exclude) - if err != nil { - return nil, fmt.Errorf("build exclude set: %w", err) - } - exclude, err := excludeFS.Files() - if err != nil { - return nil, fmt.Errorf("list exclude files: %w", err) - } - for _, f := range exclude { - all.Remove(f) - } + return err } - - files := all.Iter() - // Sort for a stable zip (same content → same hash regardless of map iteration order). + // Sort for a stable zip (same content → same hash regardless of iteration order). slices.SortFunc(files, func(a, b fileset.File) int { if a.Relative < b.Relative { return -1 @@ -134,14 +86,6 @@ func syncFiles(ctx context.Context, b *bundle.Bundle) ([]fileset.File, error) { } return 0 }) - return files, nil -} - -func addSyncRootToZip(ctx context.Context, zw *zip.Writer, b *bundle.Bundle) error { - files, err := syncFiles(ctx, b) - if err != nil { - return err - } for _, f := range files { rc, err := b.SyncRoot.Open(f.Relative) diff --git a/bundle/deploy/snapshot/path_test.go b/bundle/deploy/snapshot/path_test.go index 9728fadb507..9b87182eab1 100644 --- a/bundle/deploy/snapshot/path_test.go +++ b/bundle/deploy/snapshot/path_test.go @@ -125,6 +125,6 @@ func TestBundleZipDoNotStripNotebookExtensions(t *testing.T) { require.NoError(t, err) names := zipEntryNames(t, zipContent) - assert.True(t, slices.Contains(names, "files/src/my_notebook.ipynb"), "notebook should have extension stripped") + assert.True(t, slices.Contains(names, "files/src/my_notebook.ipynb"), "notebook should keep its extension") assert.True(t, slices.Contains(names, "files/src/script.py"), "regular Python file should keep its extension") } diff --git a/bundle/deploy/snapshot/state.go b/bundle/deploy/snapshot/state.go new file mode 100644 index 00000000000..e1f47f55665 --- /dev/null +++ b/bundle/deploy/snapshot/state.go @@ -0,0 +1,63 @@ +package snapshot + +import ( + "context" + "os" + "path" + "path/filepath" + "strings" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" +) + +const snapshotPathStateFile = "snapshot_path" + +type saveState struct{} +type loadState struct{} + +// SaveState writes the snapshot path to the local deployment state directory +// so it can be recovered during destroy without reading metadata.json. +func SaveState() bundle.Mutator { + return &saveState{} +} + +// LoadState reads the snapshot path from the local deployment state directory +// and sets workspace.snapshot_path. Missing state is treated as a no-op so +// destroy can proceed against bundles deployed before this feature was added. +func LoadState() bundle.Mutator { + return &loadState{} +} + +func (s *saveState) Name() string { return "snapshot.SaveState" } +func (s *loadState) Name() string { return "snapshot.LoadState" } + +func (s *saveState) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + if b.Config.Workspace.SnapshotPath == "" { + return nil + } + dir, err := b.LocalStateDir(ctx) + if err != nil { + return diag.FromErr(err) + } + p := filepath.Join(dir, snapshotPathStateFile) + return diag.FromErr(os.WriteFile(p, []byte(b.Config.Workspace.SnapshotPath), 0o600)) +} + +func (s *loadState) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + dir := b.GetLocalStateDir(ctx) + data, err := os.ReadFile(filepath.Join(dir, snapshotPathStateFile)) + if os.IsNotExist(err) { + return nil + } + if err != nil { + return diag.FromErr(err) + } + snapshotPath := strings.TrimSpace(string(data)) + b.Config.Workspace.SnapshotPath = snapshotPath + // Restore FilePath and ArtifactPath so that TranslateResourcePaths() can + // rewrite local absolute paths to snapshot paths during destroy. + b.Config.Workspace.FilePath = path.Join(snapshotPath, "src", "files") + b.Config.Workspace.ArtifactPath = path.Join(snapshotPath, "src", "artifacts") + return nil +} diff --git a/bundle/deploy/snapshot/translate_paths.go b/bundle/deploy/snapshot/translate_paths.go new file mode 100644 index 00000000000..1576197e7f4 --- /dev/null +++ b/bundle/deploy/snapshot/translate_paths.go @@ -0,0 +1,50 @@ +package snapshot + +import ( + "context" + "strings" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" +) + +type translateResourcePaths struct{} + +// TranslateResourcePaths replaces local absolute paths in resource configs with the +// remote snapshot path. It must run after snapshot.Upload() has set +// b.Config.Workspace.FilePath to the content-addressed snapshot location. +// +// translate_paths.go uses b.SyncRootPath as the remote root for immutable bundles, +// so resource paths are stored as local absolute paths until this mutator rewrites them. +func TranslateResourcePaths() bundle.Mutator { + return &translateResourcePaths{} +} + +func (m *translateResourcePaths) Name() string { return "snapshot.TranslateResourcePaths" } + +func (m *translateResourcePaths) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { + localPrefix := b.SyncRootPath + "/" + remotePrefix := b.Config.Workspace.FilePath + "/" + + err := b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) { + return dyn.Walk(root, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + if len(p) == 0 { + return v, nil + } + // Only rewrite paths inside the resources section. + if p[0] != dyn.Key("resources") { + return v, dyn.ErrSkip + } + str, ok := v.AsString() + if !ok { + return v, nil + } + if !strings.HasPrefix(str, localPrefix) { + return v, nil + } + return dyn.NewValue(remotePrefix+strings.TrimPrefix(str, localPrefix), v.Locations()), nil + }) + }) + return diag.FromErr(err) +} diff --git a/bundle/deploy/snapshot/upload.go b/bundle/deploy/snapshot/upload.go index 4eea5757cdd..86e8160ebfd 100644 --- a/bundle/deploy/snapshot/upload.go +++ b/bundle/deploy/snapshot/upload.go @@ -8,13 +8,12 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/filer" "github.com/databricks/cli/libs/log" ) type snapshotUpload struct { // uploader allows test injection of a custom SnapshotUploader. - uploader filer.SnapshotUploader + uploader SnapshotUploader } // Upload returns a mutator that builds the bundle zip, uploads it via @@ -32,7 +31,7 @@ func (m *snapshotUpload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagn uploader := m.uploader if uploader == nil { var err error - uploader, err = filer.NewSnapshotUploader(b.WorkspaceClient(ctx)) + uploader, err = NewSnapshotUploader(b.WorkspaceClient(ctx)) if err != nil { return diag.FromErr(err) } diff --git a/bundle/phases/build.go b/bundle/phases/build.go index 8411e376ebb..c60db5235bc 100644 --- a/bundle/phases/build.go +++ b/bundle/phases/build.go @@ -14,9 +14,11 @@ import ( "github.com/databricks/cli/libs/logdiag" ) +// LibLocationMap maps artifact names to library locations that need uploading. +// Computed by Build and consumed by Deploy to upload the right files. type LibLocationMap map[string][]libraries.LocationToUpdate -// The build phase builds artifacts. +// Build runs the build phase, which builds artifacts. func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { log.Info(ctx, "Phase: build") @@ -42,6 +44,10 @@ func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { libraries.SwitchToPatchedWheels(), ) + if logdiag.HasError(ctx) { + return nil + } + // For immutable bundles, library remote paths are set in the deploy phase // after snapshot.Upload() provides the content-addressed workspace.artifact_path. if b.Config.Bundle.Deployment.ImmutableFolder { @@ -49,15 +55,9 @@ func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { } libs, diags := libraries.ReplaceWithRemotePath(ctx, b) - for _, diag := range diags { - logdiag.LogDiag(ctx, diag) + for _, d := range diags { + logdiag.LogDiag(ctx, d) } - - bundle.ApplyContext(ctx, b, - // TransformWheelTask must be run after ReplaceWithRemotePath so we can use correct remote path in the - // transformed notebook - trampoline.TransformWheelTask(), - ) - + bundle.ApplyContext(ctx, b, trampoline.TransformWheelTask()) return libs } diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 5c9b7979ae5..e8c22d56ffa 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -8,8 +8,7 @@ import ( "github.com/databricks/cli/bundle/artifacts" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/engine" - "github.com/databricks/cli/bundle/config/mutator" - "github.com/databricks/cli/bundle/deploy" +"github.com/databricks/cli/bundle/deploy" "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" "github.com/databricks/cli/bundle/deploy/metadata" @@ -22,7 +21,7 @@ import ( "github.com/databricks/cli/bundle/permissions" "github.com/databricks/cli/bundle/scripts" "github.com/databricks/cli/bundle/statemgmt" - "github.com/databricks/cli/libs/agent" +"github.com/databricks/cli/libs/agent" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/logdiag" @@ -145,13 +144,13 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand if b.Config.Bundle.Deployment.ImmutableFolder { // Upload all source files and built artifacts as a single immutable snapshot. - // The API assigns a content-addressed path, so workspace.snapshot_path (and - // derived workspace.file_path / workspace.artifact_path) are only known after - // upload. Resolve variable references in resources and set library remote paths - // once the actual paths are available. + // The API assigns a content-addressed workspace.file_path; snapshot.TranslateResourcePaths() + // then replaces the local absolute paths (written by translate_paths during validate) + // with the actual snapshot remote paths. bundle.ApplySeqContext(ctx, b, snapshot.Upload(), - mutator.ResolveVariableReferencesOnlyResources(), + snapshot.TranslateResourcePaths(), + snapshot.SaveState(), ) if !logdiag.HasError(ctx) { _, libDiags := libraries.ReplaceWithRemotePath(ctx, b) diff --git a/bundle/phases/destroy.go b/bundle/phases/destroy.go index 6ab84ad7787..3dccf770275 100644 --- a/bundle/phases/destroy.go +++ b/bundle/phases/destroy.go @@ -10,7 +10,7 @@ import ( "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" - deploymetadata "github.com/databricks/cli/bundle/deploy/metadata" + "github.com/databricks/cli/bundle/deploy/snapshot" "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/deployplan" "github.com/databricks/cli/bundle/direct" @@ -140,11 +140,11 @@ func Destroy(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) { } if b.Config.Bundle.Deployment.ImmutableFolder { - // For immutable bundles, resource paths contain ${workspace.snapshot_path} - // which was set during deploy by snapshot.Upload(). Load it from the stored - // metadata so it can be resolved before Terraform processes the config. - mutators = append([]bundle.Mutator{deploymetadata.Load()}, mutators...) - mutators = append(mutators, mutator.ResolveVariableReferencesOnlyResources()) + // For immutable bundles, resource paths are local absolute paths after + // translate_paths. Restore workspace.file_path from the local state file + // and replace the local prefix with the snapshot remote path before + // Terraform processes the config. + mutators = append([]bundle.Mutator{snapshot.LoadState(), snapshot.TranslateResourcePaths()}, mutators...) } mutators = append(mutators, diff --git a/cmd/bundle/utils/process.go b/cmd/bundle/utils/process.go index d61c4525530..683477c465f 100644 --- a/cmd/bundle/utils/process.go +++ b/cmd/bundle/utils/process.go @@ -297,7 +297,6 @@ func ProcessBundleRet(cmd *cobra.Command, opts ProcessOptions) (b *bundle.Bundle } var libs phases.LibLocationMap - if opts.Build { t2 := time.Now() libs = phases.Build(ctx, b) diff --git a/libs/sync/sync.go b/libs/sync/sync.go index c65b49eb775..c7a1428c764 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -245,6 +245,35 @@ func (s *Sync) GetFileList(ctx context.Context) ([]fileset.File, error) { return all.Iter(), nil } +// GetFileList returns the list of files that would be synced given opts, +// applying the same git-aware include/exclude logic as RunOnce. +// Unlike New, it does not verify the remote path or load a sync snapshot. +func GetFileList(ctx context.Context, opts SyncOptions) ([]fileset.File, error) { + paths := opts.Paths + if len(paths) == 0 { + paths = []string{"."} + } + fileSet, err := git.NewFileSet(ctx, opts.WorktreeRoot, opts.LocalRoot, paths) + if err != nil { + return nil, fmt.Errorf("build file set: %w", err) + } + includeFileSet, err := fileset.NewGlobSet(opts.LocalRoot, opts.Include) + if err != nil { + return nil, err + } + excludeFileSet, err := fileset.NewGlobSet(opts.LocalRoot, opts.Exclude) + if err != nil { + return nil, err + } + s := &Sync{ + SyncOptions: &opts, + fileSet: fileSet, + includeFileSet: includeFileSet, + excludeFileSet: excludeFileSet, + } + return s.GetFileList(ctx) +} + func (s *Sync) RunContinuous(ctx context.Context) error { ticker := time.NewTicker(s.PollInterval) defer ticker.Stop() diff --git a/libs/testserver/handlers.go b/libs/testserver/handlers.go index b1ec9b2e3d8..c98d3b826ec 100644 --- a/libs/testserver/handlers.go +++ b/libs/testserver/handlers.go @@ -1,9 +1,13 @@ package testserver import ( + "bytes" "encoding/base64" "encoding/json" "fmt" + "io" + "mime" + "mime/multipart" "net/http" "path" "strings" @@ -537,6 +541,45 @@ func AddDefaultHandlers(server *Server) { return req.Workspace.ReposDelete(req) }) + server.Handle("POST", "/api/2.0/repos/snapshots", func(req Request) any { + contentType := req.Headers.Get("Content-Type") + mediaType, params, err := mime.ParseMediaType(contentType) + if err != nil || !strings.HasPrefix(mediaType, "multipart/") { + return Response{StatusCode: http.StatusBadRequest} + } + + mr := multipart.NewReader(bytes.NewReader(req.Body), params["boundary"]) + var bundleID, snapshotID string + for { + p, err := mr.NextPart() + if err == io.EOF { + break + } + if err != nil { + return Response{StatusCode: http.StatusInternalServerError} + } + data, err := io.ReadAll(p) + if err != nil { + return Response{StatusCode: http.StatusInternalServerError} + } + switch p.FormName() { + case "bundle_id": + bundleID = string(data) + case "snapshot_id": + snapshotID = string(data) + } + } + + // The real API uses the workspace user UUID (not email) in the snapshot path, + // matching service-principal identities used in cloud acceptance tests. + snapshotPath := fmt.Sprintf("/Workspace/Users/%s/.snapshots/%s/%s", TestUserSP.UserName, bundleID, snapshotID) + return map[string]any{ + "snapshot": map[string]any{ + "path": snapshotPath, + }, + } + }) + // SQL Warehouses: server.Handle("GET", "/api/2.0/sql/warehouses/{warehouse_id}", func(req Request) any { From be6dec3a6fa107d503bd30b77649e750e8a7aab8 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 18 Jun 2026 18:49:26 +0200 Subject: [PATCH 12/15] fix fmt --- bundle/config/bundle.go | 1 - bundle/config/mutator/resolve_variable_references.go | 1 - bundle/deploy/snapshot/state.go | 6 ++++-- bundle/phases/deploy.go | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bundle/config/bundle.go b/bundle/config/bundle.go index 473e355225f..ce6d25bfe62 100644 --- a/bundle/config/bundle.go +++ b/bundle/config/bundle.go @@ -59,5 +59,4 @@ type Bundle struct { // A stable generated UUID for the bundle. This is normally serialized by // Databricks first party template when a user runs bundle init. Uuid string `json:"uuid,omitempty"` - } diff --git a/bundle/config/mutator/resolve_variable_references.go b/bundle/config/mutator/resolve_variable_references.go index 4da02a31b04..fab5dc218f1 100644 --- a/bundle/config/mutator/resolve_variable_references.go +++ b/bundle/config/mutator/resolve_variable_references.go @@ -79,7 +79,6 @@ func ResolveVariableReferencesOnlyResources(prefixes ...string) bundle.Mutator { } } - func ResolveVariableReferencesWithoutResources(prefixes ...string) bundle.Mutator { if len(prefixes) == 0 { prefixes = defaultPrefixes diff --git a/bundle/deploy/snapshot/state.go b/bundle/deploy/snapshot/state.go index e1f47f55665..681da1b94fc 100644 --- a/bundle/deploy/snapshot/state.go +++ b/bundle/deploy/snapshot/state.go @@ -13,8 +13,10 @@ import ( const snapshotPathStateFile = "snapshot_path" -type saveState struct{} -type loadState struct{} +type ( + saveState struct{} + loadState struct{} +) // SaveState writes the snapshot path to the local deployment state directory // so it can be recovered during destroy without reading metadata.json. diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 6202870f700..851cac81d9a 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -8,7 +8,7 @@ import ( "github.com/databricks/cli/bundle/artifacts" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/engine" -"github.com/databricks/cli/bundle/deploy" + "github.com/databricks/cli/bundle/deploy" "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" "github.com/databricks/cli/bundle/deploy/metadata" @@ -21,7 +21,7 @@ import ( "github.com/databricks/cli/bundle/permissions" "github.com/databricks/cli/bundle/scripts" "github.com/databricks/cli/bundle/statemgmt" -"github.com/databricks/cli/libs/agent" + "github.com/databricks/cli/libs/agent" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/logdiag" From 0f1ed5201a4400130ecb0ea46d7f2669a6d8809a Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 18 Jun 2026 18:50:04 +0200 Subject: [PATCH 13/15] fix annotations --- bundle/internal/schema/annotations.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index 234e099dd41..08a9be3086b 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -90,8 +90,8 @@ bundle: "description": |- Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. "immutable_folder": - "description": |- - Whether to upload bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a zip and uploaded via the snapshot API, and workspace.file_path and workspace.artifact_path are set to the returned content-addressed path. The validate and plan commands make no mutative API calls when this is enabled. + "description": |- + Whether to upload bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a zip and uploaded via the snapshot API, and workspace.file_path and workspace.artifact_path are set to the returned content-addressed path. The validate and plan commands make no mutative API calls when this is enabled. "lock": "description": |- The deployment lock attributes. From d9785598008fc65cfb3a619fc715c326ba18eafe Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 18 Jun 2026 18:56:19 +0200 Subject: [PATCH 14/15] fix lint --- bundle/deploy/snapshot/state.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/bundle/deploy/snapshot/state.go b/bundle/deploy/snapshot/state.go index 681da1b94fc..b53fa106638 100644 --- a/bundle/deploy/snapshot/state.go +++ b/bundle/deploy/snapshot/state.go @@ -2,6 +2,8 @@ package snapshot import ( "context" + "errors" + "io/fs" "os" "path" "path/filepath" @@ -38,10 +40,12 @@ func (s *saveState) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostic if b.Config.Workspace.SnapshotPath == "" { return nil } + dir, err := b.LocalStateDir(ctx) if err != nil { return diag.FromErr(err) } + p := filepath.Join(dir, snapshotPathStateFile) return diag.FromErr(os.WriteFile(p, []byte(b.Config.Workspace.SnapshotPath), 0o600)) } @@ -49,14 +53,18 @@ func (s *saveState) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostic func (s *loadState) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { dir := b.GetLocalStateDir(ctx) data, err := os.ReadFile(filepath.Join(dir, snapshotPathStateFile)) - if os.IsNotExist(err) { + + if errors.Is(err, fs.ErrNotExist) { return nil } + if err != nil { return diag.FromErr(err) } + snapshotPath := strings.TrimSpace(string(data)) b.Config.Workspace.SnapshotPath = snapshotPath + // Restore FilePath and ArtifactPath so that TranslateResourcePaths() can // rewrite local absolute paths to snapshot paths during destroy. b.Config.Workspace.FilePath = path.Join(snapshotPath, "src", "files") From 9a6c898cc7f63c1c9575c40b89643ac138e5a208 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 18 Jun 2026 18:59:36 +0200 Subject: [PATCH 15/15] do not call set permissions on immutable ws root --- bundle/permissions/workspace_root.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/bundle/permissions/workspace_root.go b/bundle/permissions/workspace_root.go index 9bb9065fe80..f33e223ed1b 100644 --- a/bundle/permissions/workspace_root.go +++ b/bundle/permissions/workspace_root.go @@ -28,6 +28,11 @@ func (*workspaceRootPermissions) Name() string { // Apply implements bundle.Mutator. func (*workspaceRootPermissions) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + // If the bundle is immutable, we don't need to apply any permissions to the workspace root. + if b.Config.Bundle.Deployment.ImmutableFolder { + return nil + } + stateFolderPermissions, err := giveAccessForWorkspaceRoot(ctx, b) if err != nil { return diag.FromErr(err)