diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl b/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl new file mode 100644 index 00000000000..caacf79f907 --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl @@ -0,0 +1,22 @@ +bundle: + name: test-bundle-immutable-no-artifacts-$UNIQUE_NAME + deployment: + immutable_folder: true + +resources: + jobs: + my_job: + name: my job + tasks: + - task_key: spark_python_task + spark_python_task: + python_file: ./src/main.py + environment_key: env + - task_key: notebook_task + notebook_task: + notebook_path: ./src/notebook.py + + environments: + - environment_key: env + spec: + environment_version: "4" diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml b/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml new file mode 100644 index 00000000000..bbc7fcfd1bd --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = true +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/output.txt b/acceptance/bundle/deploy/immutable-no-artifacts/output.txt new file mode 100644 index 00000000000..52398f774a3 --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/output.txt @@ -0,0 +1,30 @@ + +>>> [CLI] bundle validate +Name: test-bundle-immutable-no-artifacts-[UNIQUE_NAME] +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/default + +Validation OK! + +>>> [CLI] bundle deploy +Uploading immutable bundle snapshot... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] jobs get [NUMID] +"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/main.py" + +>>> [CLI] jobs get [NUMID] +"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/notebook" + +>>> [CLI] bundle destroy --auto-approve +The following resources will be deleted: + delete resources.jobs.my_job + +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/default + +Deleting files... +Destroy complete! diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/script b/acceptance/bundle/deploy/immutable-no-artifacts/script new file mode 100644 index 00000000000..21541fb79ee --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/script @@ -0,0 +1,14 @@ +envsubst < databricks.yml.tmpl > databricks.yml +cleanup() { + trace $CLI bundle destroy --auto-approve +} +trap cleanup EXIT + +trace $CLI bundle validate +trace $CLI bundle deploy + + +# Get a job and check that task paths point into the snapshot +JOB_ID=$($CLI bundle summary -o json | jq -r '.resources.jobs.my_job.id') +trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.spark_python_task != null) | .spark_python_task.python_file' +trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.notebook_task != null) | .notebook_task.notebook_path' diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/src/main.py b/acceptance/bundle/deploy/immutable-no-artifacts/src/main.py new file mode 100644 index 00000000000..6c285f7e2f5 --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/src/main.py @@ -0,0 +1 @@ +print("Hello from Spark Python Task!") diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/src/notebook.py b/acceptance/bundle/deploy/immutable-no-artifacts/src/notebook.py new file mode 100644 index 00000000000..fb3c9597fbf --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/src/notebook.py @@ -0,0 +1,3 @@ +# Databricks notebook source + +print("Hello from Notebook Task!") diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/test.toml b/acceptance/bundle/deploy/immutable-no-artifacts/test.toml new file mode 100644 index 00000000000..21acfa23bd6 --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/test.toml @@ -0,0 +1,16 @@ +Local = true +Cloud = true + +Ignore = [ + "databricks.yml", + ".databricks", + ".venv", + "script", + "*.pyc", +] + +# Normalize the content-addressed snapshot hash so it doesn't need to be +# hardcoded in output.txt and the test stays stable across file changes. +[[Repls]] +Old = '[0-9a-f]{64}' +New = '[SNAPSHOT_HASH]' diff --git a/acceptance/bundle/deploy/immutable/databricks.yml.tmpl b/acceptance/bundle/deploy/immutable/databricks.yml.tmpl new file mode 100644 index 00000000000..6dd62eee1fb --- /dev/null +++ b/acceptance/bundle/deploy/immutable/databricks.yml.tmpl @@ -0,0 +1,33 @@ +bundle: + name: test-bundle-immutable-$UNIQUE_NAME + deployment: + immutable_folder: true + +artifacts: + python_artifact: + type: whl + build: uv build --wheel + +resources: + jobs: + my_job: + name: my job + tasks: + - task_key: spark_python_task + spark_python_task: + python_file: ./src/main.py + environment_key: env + - task_key: notebook_task + notebook_task: + notebook_path: ./src/notebook.py + - task_key: python_wheel_task + python_wheel_task: + package_name: immutable + entry_point: main + environment_key: env + environments: + - environment_key: env + spec: + environment_version: "4" + dependencies: + - ./dist/*.whl diff --git a/acceptance/bundle/deploy/immutable/out.test.toml b/acceptance/bundle/deploy/immutable/out.test.toml new file mode 100644 index 00000000000..650836edeb3 --- /dev/null +++ b/acceptance/bundle/deploy/immutable/out.test.toml @@ -0,0 +1,3 @@ +Local = false +Cloud = true +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/deploy/immutable/output.txt b/acceptance/bundle/deploy/immutable/output.txt new file mode 100644 index 00000000000..5fc2ed07493 --- /dev/null +++ b/acceptance/bundle/deploy/immutable/output.txt @@ -0,0 +1,45 @@ + +>>> [CLI] bundle validate +Name: test-bundle-immutable-[UNIQUE_NAME] +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-[UNIQUE_NAME]/default + +Validation OK! + +>>> [CLI] bundle deploy +Building python_artifact... +Uploading immutable bundle snapshot... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] jobs get [NUMID] +"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/main.py" + +>>> [CLI] jobs get [NUMID] +"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/notebook" + +>>> [CLI] jobs get [NUMID] +[ + "/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/artifacts/.internal/immutable-0.0.1-py3-none-any.whl" +] + +>>> [CLI] bundle run my_job +script: line 182: sort_lines: command not found +Run URL: [DATABRICKS_URL]/?o=[NUMID]#job/[NUMID]/run/[NUMID] + +[TIMESTAMP] "my job" RUNNING +[TIMESTAMP] "my job" TERMINATED SUCCESS + +>>> [CLI] bundle destroy --auto-approve +The following resources will be deleted: + delete resources.jobs.my_job + +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-[UNIQUE_NAME]/default + +Deleting files... +Destroy complete! + +Exit code: 127 diff --git a/acceptance/bundle/deploy/immutable/pyproject.toml b/acceptance/bundle/deploy/immutable/pyproject.toml new file mode 100644 index 00000000000..3e49b180137 --- /dev/null +++ b/acceptance/bundle/deploy/immutable/pyproject.toml @@ -0,0 +1,31 @@ +[project] +name = "immutable" +version = "0.0.1" +authors = [{ name = "andrew.nester@databricks.com" }] +requires-python = ">=3.10,<3.13" +dependencies = [ + # Any dependencies for jobs and pipelines in this project can be added here + # See also https://docs.databricks.com/dev-tools/bundles/library-dependencies + # + # LIMITATION: for pipelines, dependencies are cached during development; + # add dependencies to the 'environment' section of your pipeline.yml file instead +] + +[dependency-groups] +dev = [ + "pytest", + "ruff", + "databricks-dlt", + "databricks-connect>=15.4,<15.5", + "ipykernel", +] + +[project.scripts] +main = "immutable.main:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.ruff] +line-length = 120 diff --git a/acceptance/bundle/deploy/immutable/script b/acceptance/bundle/deploy/immutable/script new file mode 100644 index 00000000000..c783212308a --- /dev/null +++ b/acceptance/bundle/deploy/immutable/script @@ -0,0 +1,18 @@ +envsubst < databricks.yml.tmpl > databricks.yml +cleanup() { + trace $CLI bundle destroy --auto-approve +} +trap cleanup EXIT + +trace $CLI bundle validate +trace $CLI bundle deploy + + +# Get a job and check that task paths are immutable +JOB_ID=$($CLI bundle summary -o json | jq -r '.resources.jobs.my_job.id') +trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.spark_python_task != null) | .spark_python_task.python_file' +trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.notebook_task != null) | .notebook_task.notebook_path' +trace $CLI jobs get $JOB_ID | jq '.settings.environments[0].spec.dependencies' + +# Sort output to make it stable +trace $CLI bundle run my_job | sort_lines diff --git a/acceptance/bundle/deploy/immutable/src/immutable/__init__.py b/acceptance/bundle/deploy/immutable/src/immutable/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/acceptance/bundle/deploy/immutable/src/immutable/main.py b/acceptance/bundle/deploy/immutable/src/immutable/main.py new file mode 100644 index 00000000000..9eccd00150d --- /dev/null +++ b/acceptance/bundle/deploy/immutable/src/immutable/main.py @@ -0,0 +1,6 @@ +def main(): + print("Hello from Python Wheel Task!") + + +if __name__ == "__main__": + main() diff --git a/acceptance/bundle/deploy/immutable/src/main.py b/acceptance/bundle/deploy/immutable/src/main.py new file mode 100644 index 00000000000..6c285f7e2f5 --- /dev/null +++ b/acceptance/bundle/deploy/immutable/src/main.py @@ -0,0 +1 @@ +print("Hello from Spark Python Task!") diff --git a/acceptance/bundle/deploy/immutable/src/notebook.py b/acceptance/bundle/deploy/immutable/src/notebook.py new file mode 100644 index 00000000000..fb3c9597fbf --- /dev/null +++ b/acceptance/bundle/deploy/immutable/src/notebook.py @@ -0,0 +1,3 @@ +# Databricks notebook source + +print("Hello from Notebook Task!") diff --git a/acceptance/bundle/deploy/immutable/test.toml b/acceptance/bundle/deploy/immutable/test.toml new file mode 100644 index 00000000000..a97b714bea8 --- /dev/null +++ b/acceptance/bundle/deploy/immutable/test.toml @@ -0,0 +1,16 @@ +Local = false +Cloud = true + +Ignore = [ + "dist", + "databricks.yml", + ".databricks", + ".venv", + "script", + "*.pyc", +] + +[[Repls]] +# Replace snapshot hash with SNAPSHOT_HASH +Old = "[0-9a-f]{64}" +New = "[SNAPSHOT_HASH]" diff --git a/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml b/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml new file mode 100644 index 00000000000..b6ce1bd97d1 --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml @@ -0,0 +1,22 @@ +bundle: + name: my-bundle + deployment: + immutable_folder: true + +sync: + exclude: + # Test framework files that are not part of the bundle source. + - "repls.json" + - "user_repls.json" + - "script" + - "*.toml" + +resources: + jobs: + my_job: + name: my job + tasks: + - task_key: my_task + existing_cluster_id: "0101-120000-aaaaaaaa" + spark_python_task: + python_file: ./src/main.py diff --git a/acceptance/bundle/validate/immutable_workspace_paths/out.test.toml b/acceptance/bundle/validate/immutable_workspace_paths/out.test.toml new file mode 100644 index 00000000000..f784a183258 --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/validate/immutable_workspace_paths/output.txt b/acceptance/bundle/validate/immutable_workspace_paths/output.txt new file mode 100644 index 00000000000..da1a2f05768 --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/output.txt @@ -0,0 +1,30 @@ + +>>> [CLI] bundle validate -o json +Warning: Pattern user_repls.json does not match any files + at sync.exclude[1] + in databricks.yml:10:7 + +{ + "workspace": { + "artifact_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/artifacts", + "current_user": { + "domain_friendly_name": "[USERNAME]", + "id": "[USERID]", + "short_name": "[USERNAME]", + "userName": "[USERNAME]" + }, + "file_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/files", + "resource_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/resources", + "root_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default", + "state_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/state" + }, + "tasks": [ + { + "existing_cluster_id": "0101-120000-aaaaaaaa", + "spark_python_task": { + "python_file": "[TEST_TMP_DIR]/src/main.py" + }, + "task_key": "my_task" + } + ] +} diff --git a/acceptance/bundle/validate/immutable_workspace_paths/script b/acceptance/bundle/validate/immutable_workspace_paths/script new file mode 100644 index 00000000000..df056fa9b99 --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/script @@ -0,0 +1 @@ +trace $CLI bundle validate -o json | jq '{workspace: .workspace, tasks: .resources.jobs.my_job.tasks}' diff --git a/acceptance/bundle/validate/immutable_workspace_paths/src/main.py b/acceptance/bundle/validate/immutable_workspace_paths/src/main.py new file mode 100644 index 00000000000..11b15b1a458 --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/src/main.py @@ -0,0 +1 @@ +print("hello") diff --git a/acceptance/bundle/validate/immutable_workspace_paths/test.toml b/acceptance/bundle/validate/immutable_workspace_paths/test.toml new file mode 100644 index 00000000000..85e02532c93 --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +Ignore = [".databricks"] diff --git a/bundle/config/deployment.go b/bundle/config/deployment.go index b7efb4456f9..8be0596823e 100644 --- a/bundle/config/deployment.go +++ b/bundle/config/deployment.go @@ -5,6 +5,13 @@ type Deployment struct { // running jobs or pipelines in the workspace. Defaults to false. FailOnActiveRuns bool `json:"fail_on_active_runs,omitempty"` + // ImmutableFolder specifies that bundle files and artifacts are uploaded as a + // single immutable snapshot rather than being synced individually. When true, + // the deployment calls /api/2.0/repos/snapshots with a zip containing all files + // and sets workspace.file_path and workspace.artifact_path to the returned + // content-addressed path. validate and plan make no mutative API calls. + ImmutableFolder bool `json:"immutable_folder,omitempty"` + // Lock configures locking behavior on deployment. Lock Lock `json:"lock,omitempty"` } diff --git a/bundle/config/mutator/resolve_variable_references.go b/bundle/config/mutator/resolve_variable_references.go index 113f0576394..fab5dc218f1 100644 --- a/bundle/config/mutator/resolve_variable_references.go +++ b/bundle/config/mutator/resolve_variable_references.go @@ -59,6 +59,11 @@ type resolveVariableReferences struct { includeResources bool artifactsReferenceUsed bool + + // excludePaths lists variable reference paths (e.g. "workspace.file_path") whose + // resolution should be skipped. References to these paths remain unresolved so a + // later mutator can set the value and re-run resolution. + excludePaths []string } func ResolveVariableReferencesOnlyResources(prefixes ...string) bundle.Mutator { @@ -229,6 +234,9 @@ func (m *resolveVariableReferences) resolveOnce(b *bundle.Bundle, prefixes []dyn // Perform resolution only if the path starts with one of the specified prefixes. if slices.ContainsFunc(prefixes, path.HasPrefix) { + if slices.Contains(m.excludePaths, path.String()) { + return dyn.InvalidValue, dynvar.ErrSkipResolution + } value, err := m.lookupFn(normalized, path, b) hasUpdates = hasUpdates || (err == nil && value.IsValid()) return value, err diff --git a/bundle/config/mutator/resourcemutator/process_static_resources.go b/bundle/config/mutator/resourcemutator/process_static_resources.go index 7d3ad742e4b..5fcd54de33f 100644 --- a/bundle/config/mutator/resourcemutator/process_static_resources.go +++ b/bundle/config/mutator/resourcemutator/process_static_resources.go @@ -38,13 +38,16 @@ func (p processStaticResources) Apply(ctx context.Context, b *bundle.Bundle) dia // we need to resolve variables because they can change path values: // - variable can be used a prefix // - path can be part of a complex variable value + + resourceResolver := mutator.ResolveVariableReferencesOnlyResources() + bundle.ApplySeqContext( ctx, b, // Reads (dynamic): * (strings) (searches for variable references in string values) // Updates (dynamic): resources.* (strings) (resolves variable references to their actual values) // Resolves variable references in 'resources' using bundle, workspace, and variables prefixes - mutator.ResolveVariableReferencesOnlyResources(), + resourceResolver, mutator.NormalizePaths(), // Translate dashboard paths into paths in the workspace file system diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index b36ec094447..ab81029d6b4 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -320,11 +320,14 @@ func (t *translateContext) rewriteValue(ctx context.Context, p dyn.Path, v dyn.V } func applyTranslations(ctx context.Context, b *bundle.Bundle, t *translateContext, translations []func(context.Context, dyn.Value) (dyn.Value, error)) diag.Diagnostics { - // Set the remote root to the sync root if source-linked deployment is enabled. - // Otherwise, set it to the workspace file path. - if config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment) { + switch { + case b.Config.Bundle.Deployment.ImmutableFolder: + // Keep paths as local absolute paths during validate. snapshot.TranslateResourcePaths() + // replaces this local prefix with the actual snapshot path after upload. t.remoteRoot = t.b.SyncRootPath - } else { + case config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment): + t.remoteRoot = t.b.SyncRootPath + default: t.remoteRoot = t.b.Config.Workspace.FilePath } diff --git a/bundle/config/workspace.go b/bundle/config/workspace.go index 1300a87a78c..1df602d2088 100644 --- a/bundle/config/workspace.go +++ b/bundle/config/workspace.go @@ -78,6 +78,12 @@ type Workspace struct { // Remote workspace path for deployment state. // This defaults to "${workspace.root}/state". StatePath string `json:"state_path,omitempty"` + + // SnapshotPath is the workspace path of the immutable snapshot uploaded during + // deployment. It is set by snapshot.Upload() and persisted to local state so + // that snapshot.LoadState() can restore workspace.file_path for destroy. + // Only populated for bundles with deployment.immutable_folder = true. + SnapshotPath string `json:"snapshot_path,omitempty" bundle:"internal"` } type User struct { diff --git a/bundle/deploy/metadata/compute.go b/bundle/deploy/metadata/compute.go index cb7be9811c4..08a45d7a17c 100644 --- a/bundle/deploy/metadata/compute.go +++ b/bundle/deploy/metadata/compute.go @@ -105,6 +105,7 @@ func (m *compute) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics // Set file upload destination of the bundle in metadata b.Metadata.Config.Workspace.FilePath = b.Config.Workspace.FilePath + b.Metadata.Config.Workspace.SnapshotPath = b.Config.Workspace.SnapshotPath // In source-linked deployment files are not copied and resources use source files, therefore we use sync path as file path in metadata if config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) { b.Metadata.Config.Workspace.FilePath = b.SyncRootPath diff --git a/bundle/deploy/snapshot/client.go b/bundle/deploy/snapshot/client.go new file mode 100644 index 00000000000..3a720cb2150 --- /dev/null +++ b/bundle/deploy/snapshot/client.go @@ -0,0 +1,103 @@ +package snapshot + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "mime/multipart" + "net/http" + "net/textproto" + + "github.com/databricks/databricks-sdk-go" + databricksclient "github.com/databricks/databricks-sdk-go/client" +) + +// SnapshotInfo holds the result of a successful snapshot upload. +type SnapshotInfo struct { + // Path is the immutable workspace path for the uploaded snapshot content. + Path string +} + +// SnapshotUploader abstracts the /api/2.0/repos/snapshots endpoint. +// snapshotID is the content-addressed key supplied by the caller; the API uses +// it as the final path component so that identical content always resolves to +// the same workspace location. +// This interface exists so the implementation can later be replaced with a Go SDK call. +type SnapshotUploader interface { + Upload(ctx context.Context, bundleID, snapshotID, currentUser string, zipContent []byte) (*SnapshotInfo, error) +} + +// snapshotAPIClient implements SnapshotUploader against /api/2.0/repos/snapshots. +type snapshotAPIClient struct { + client *databricksclient.DatabricksClient +} + +// snapshotUploadResponse mirrors the /api/2.0/repos/snapshots response body. +type snapshotUploadResponse struct { + Snapshot struct { + Path string `json:"path"` + } `json:"snapshot"` +} + +// NewSnapshotUploader creates a SnapshotUploader backed by /api/2.0/repos/snapshots. +func NewSnapshotUploader(w *databricks.WorkspaceClient) (SnapshotUploader, error) { + c, err := databricksclient.New(w.Config) + if err != nil { + return nil, err + } + return &snapshotAPIClient{client: c}, nil +} + +// Upload uploads zipContent as an immutable snapshot identified by snapshotID. +// snapshotID is the SHA-256 of the files-only zip and is used by the server as +// the content-addressed path component. currentUser is granted CAN_READ on the snapshot. +func (c *snapshotAPIClient) Upload(ctx context.Context, bundleID, snapshotID, currentUser string, zipContent []byte) (*SnapshotInfo, error) { + var body bytes.Buffer + mw := multipart.NewWriter(&body) + + if err := mw.WriteField("snapshot_id", snapshotID); err != nil { + return nil, fmt.Errorf("failed to write snapshot_id: %w", err) + } + if err := mw.WriteField("bundle_id", bundleID); err != nil { + return nil, fmt.Errorf("failed to write bundle_id: %w", err) + } + + // The API requires an access_control_list granting the current user read access. + acl, err := json.Marshal([]map[string]string{ + {"user_name": currentUser, "permission_level": "CAN_READ"}, + }) + if err != nil { + return nil, fmt.Errorf("failed to marshal access_control_list: %w", err) + } + if err := mw.WriteField("access_control_list", string(acl)); err != nil { + return nil, fmt.Errorf("failed to write access_control_list: %w", err) + } + + // Attach the zip with an explicit content-type so the server treats it as binary. + fh := make(textproto.MIMEHeader) + fh.Set("Content-Disposition", `form-data; name="file"; filename="snapshot.zip"`) + fh.Set("Content-Type", "application/zip") + part, err := mw.CreatePart(fh) + if err != nil { + return nil, fmt.Errorf("failed to create file part: %w", err) + } + if _, err := part.Write(zipContent); err != nil { + return nil, fmt.Errorf("failed to write zip content: %w", err) + } + if err := mw.Close(); err != nil { + return nil, fmt.Errorf("failed to finalize multipart body: %w", err) + } + + headers := map[string]string{ + "Content-Type": mw.FormDataContentType(), + } + + var resp snapshotUploadResponse + err = c.client.Do(ctx, http.MethodPost, "/api/2.0/repos/snapshots", headers, nil, body.Bytes(), &resp) + if err != nil { + return nil, fmt.Errorf("snapshot upload: %w", err) + } + + return &SnapshotInfo{Path: resp.Snapshot.Path}, nil +} diff --git a/bundle/deploy/snapshot/path.go b/bundle/deploy/snapshot/path.go new file mode 100644 index 00000000000..680461d2658 --- /dev/null +++ b/bundle/deploy/snapshot/path.go @@ -0,0 +1,152 @@ +package snapshot + +import ( + "archive/zip" + "bytes" + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + "os" + "path/filepath" + "slices" + "time" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/fileset" + libsync "github.com/databricks/cli/libs/sync" +) + +// zipEpoch is a fixed timestamp used for all zip entries to make the zip content-addressed +// and reproducible: the same file content always produces the same hash regardless of when +// the zip was built or the file's mtime. +var zipEpoch = time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC) + +// BundleZip builds the zip that is uploaded to the snapshot API. +// It contains: +// - all files from the bundle sync root under the "files/" prefix, +// selected with the same git-aware + include/exclude logic as files.Upload +// - all built artifact files under the "artifacts/.internal/" prefix +// +// The snapshot ID is always IDFromContent(BundleZip(b)), ensuring the +// pre-calculated path and the uploaded path are derived from the same content. +func BundleZip(ctx context.Context, b *bundle.Bundle) ([]byte, error) { + var buf bytes.Buffer + zw := zip.NewWriter(&buf) + + if err := addSyncRootToZip(ctx, zw, b); err != nil { + return nil, err + } + if err := addArtifactsToZip(zw, b); err != nil { + return nil, err + } + + if err := zw.Close(); err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +// IDFromContent returns the SHA-256 hex digest of content. +func IDFromContent(content []byte) string { + h := sha256.Sum256(content) + return hex.EncodeToString(h[:]) +} + +// SnapshotID builds the bundle zip and returns its SHA-256 hex digest. +// Called after artifacts are built so that ApplyImmutableWorkspacePaths and +// snapshot.Upload both hash identical content. +func SnapshotID(ctx context.Context, b *bundle.Bundle) (string, error) { + content, err := BundleZip(ctx, b) + if err != nil { + return "", err + } + return IDFromContent(content), nil +} + +func addSyncRootToZip(ctx context.Context, zw *zip.Writer, b *bundle.Bundle) error { + files, err := libsync.GetFileList(ctx, libsync.SyncOptions{ + WorktreeRoot: b.WorktreeRoot, + LocalRoot: b.SyncRoot, + Paths: b.Config.Sync.Paths, + Include: b.Config.Sync.Include, + Exclude: b.Config.Sync.Exclude, + }) + if err != nil { + return err + } + // Sort for a stable zip (same content → same hash regardless of iteration order). + slices.SortFunc(files, func(a, b fileset.File) int { + if a.Relative < b.Relative { + return -1 + } + if a.Relative > b.Relative { + return 1 + } + return 0 + }) + + for _, f := range files { + rc, err := b.SyncRoot.Open(f.Relative) + if err != nil { + return fmt.Errorf("open %s: %w", f.Relative, err) + } + + entryPath := filepath.ToSlash(f.Relative) + h := &zip.FileHeader{ + Name: "files/" + entryPath, + Method: zip.Deflate, + Modified: zipEpoch, + } + w, err := zw.CreateHeader(h) + if err != nil { + rc.Close() + return fmt.Errorf("zip entry for %s: %w", f.Relative, err) + } + _, err = io.Copy(w, rc) + rc.Close() + if err != nil { + return fmt.Errorf("write %s: %w", f.Relative, err) + } + } + return nil +} + +func addArtifactsToZip(zw *zip.Writer, b *bundle.Bundle) error { + for _, artifact := range b.Config.Artifacts { + for _, af := range artifact.Files { + source := af.Source + if af.Patched != "" { + source = af.Patched + } + // ".internal" matches libraries.InternalDirName so that ReplaceWithRemotePath + // produces library paths that resolve correctly inside the snapshot. + if err := addLocalFileToZip(zw, source, "artifacts/.internal"); err != nil { + return err + } + } + } + return nil +} + +func addLocalFileToZip(zw *zip.Writer, localPath, zipPrefix string) error { + f, err := os.Open(localPath) + if err != nil { + return fmt.Errorf("open %s: %w", localPath, err) + } + defer f.Close() + + entryName := zipPrefix + "/" + filepath.Base(localPath) + h := &zip.FileHeader{ + Name: entryName, + Method: zip.Deflate, + Modified: zipEpoch, + } + w, err := zw.CreateHeader(h) + if err != nil { + return fmt.Errorf("zip entry %s: %w", entryName, err) + } + _, err = io.Copy(w, f) + return err +} diff --git a/bundle/deploy/snapshot/path_test.go b/bundle/deploy/snapshot/path_test.go new file mode 100644 index 00000000000..9b87182eab1 --- /dev/null +++ b/bundle/deploy/snapshot/path_test.go @@ -0,0 +1,130 @@ +package snapshot_test + +import ( + "archive/zip" + "bytes" + "os" + "path/filepath" + "slices" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/deploy/snapshot" + "github.com/databricks/cli/libs/vfs" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func makeBundleWithFiles(t *testing.T, files map[string]string) *bundle.Bundle { + t.Helper() + dir := t.TempDir() + for name, content := range files { + p := filepath.Join(dir, name) + require.NoError(t, os.MkdirAll(filepath.Dir(p), 0o755)) + require.NoError(t, os.WriteFile(p, []byte(content), 0o644)) + } + root := vfs.MustNew(dir) + return &bundle.Bundle{ + BundleRootPath: dir, + SyncRoot: root, + // WorktreeRoot = SyncRoot is the fallback used by LoadGitDetails when + // there is no git repository. + WorktreeRoot: root, + Config: config.Root{}, + } +} + +func TestBundleZipIsDeterministic(t *testing.T) { + b := makeBundleWithFiles(t, map[string]string{ + "main.py": "print('hello')", + "src/task.py": "def run(): pass", + }) + + zip1, err := snapshot.BundleZip(t.Context(), b) + require.NoError(t, err) + zip2, err := snapshot.BundleZip(t.Context(), b) + require.NoError(t, err) + + assert.Equal(t, zip1, zip2, "BundleZip must produce identical bytes for identical content") +} + +func TestBundleZipChangesWithContent(t *testing.T) { + b1 := makeBundleWithFiles(t, map[string]string{"main.py": "v1"}) + b2 := makeBundleWithFiles(t, map[string]string{"main.py": "v2"}) + + zip1, err := snapshot.BundleZip(t.Context(), b1) + require.NoError(t, err) + zip2, err := snapshot.BundleZip(t.Context(), b2) + require.NoError(t, err) + + assert.NotEqual(t, zip1, zip2, "different file content must produce different zips") +} + +func TestBundleZipRespectsExcludes(t *testing.T) { + b := makeBundleWithFiles(t, map[string]string{ + "main.py": "print('hello')", + "skip.json": `{"id": "runtime-generated"}`, + }) + bExclude := makeBundleWithFiles(t, map[string]string{ + "main.py": "print('hello')", + "skip.json": `{"id": "runtime-generated"}`, + }) + bExclude.Config.Sync.Exclude = []string{"*.json"} + + zipAll, err := snapshot.BundleZip(t.Context(), b) + require.NoError(t, err) + zipExcl, err := snapshot.BundleZip(t.Context(), bExclude) + require.NoError(t, err) + + // The zip without the excluded file should be smaller and different. + assert.NotEqual(t, zipAll, zipExcl) + assert.Less(t, len(zipExcl), len(zipAll)) +} + +func TestIDFromContent(t *testing.T) { + id := snapshot.IDFromContent([]byte("hello")) + // SHA-256 of "hello" + assert.Equal(t, "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824", id) + assert.Len(t, id, 64, "SHA-256 hex must be 64 characters") +} + +func TestSnapshotIDMatchesBundleZipHash(t *testing.T) { + b := makeBundleWithFiles(t, map[string]string{"task.py": "x = 1"}) + + zipContent, err := snapshot.BundleZip(t.Context(), b) + require.NoError(t, err) + expectedID := snapshot.IDFromContent(zipContent) + + id, err := snapshot.SnapshotID(t.Context(), b) + require.NoError(t, err) + + assert.Equal(t, expectedID, id) +} + +func zipEntryNames(t *testing.T, zipContent []byte) []string { + t.Helper() + r, err := zip.NewReader(bytes.NewReader(zipContent), int64(len(zipContent))) + require.NoError(t, err) + names := make([]string, len(r.File)) + for i, f := range r.File { + names[i] = f.Name + } + return names +} + +func TestBundleZipDoNotStripNotebookExtensions(t *testing.T) { + // Minimal valid Jupyter notebook content. + ipynb := `{"nbformat": 4, "nbformat_minor": 5, "cells": [], "metadata": {}}` + b := makeBundleWithFiles(t, map[string]string{ + "src/my_notebook.ipynb": ipynb, + "src/script.py": "print('hello')", + }) + + zipContent, err := snapshot.BundleZip(t.Context(), b) + require.NoError(t, err) + + names := zipEntryNames(t, zipContent) + assert.True(t, slices.Contains(names, "files/src/my_notebook.ipynb"), "notebook should keep its extension") + assert.True(t, slices.Contains(names, "files/src/script.py"), "regular Python file should keep its extension") +} diff --git a/bundle/deploy/snapshot/state.go b/bundle/deploy/snapshot/state.go new file mode 100644 index 00000000000..b53fa106638 --- /dev/null +++ b/bundle/deploy/snapshot/state.go @@ -0,0 +1,73 @@ +package snapshot + +import ( + "context" + "errors" + "io/fs" + "os" + "path" + "path/filepath" + "strings" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" +) + +const snapshotPathStateFile = "snapshot_path" + +type ( + saveState struct{} + loadState struct{} +) + +// SaveState writes the snapshot path to the local deployment state directory +// so it can be recovered during destroy without reading metadata.json. +func SaveState() bundle.Mutator { + return &saveState{} +} + +// LoadState reads the snapshot path from the local deployment state directory +// and sets workspace.snapshot_path. Missing state is treated as a no-op so +// destroy can proceed against bundles deployed before this feature was added. +func LoadState() bundle.Mutator { + return &loadState{} +} + +func (s *saveState) Name() string { return "snapshot.SaveState" } +func (s *loadState) Name() string { return "snapshot.LoadState" } + +func (s *saveState) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + if b.Config.Workspace.SnapshotPath == "" { + return nil + } + + dir, err := b.LocalStateDir(ctx) + if err != nil { + return diag.FromErr(err) + } + + p := filepath.Join(dir, snapshotPathStateFile) + return diag.FromErr(os.WriteFile(p, []byte(b.Config.Workspace.SnapshotPath), 0o600)) +} + +func (s *loadState) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + dir := b.GetLocalStateDir(ctx) + data, err := os.ReadFile(filepath.Join(dir, snapshotPathStateFile)) + + if errors.Is(err, fs.ErrNotExist) { + return nil + } + + if err != nil { + return diag.FromErr(err) + } + + snapshotPath := strings.TrimSpace(string(data)) + b.Config.Workspace.SnapshotPath = snapshotPath + + // Restore FilePath and ArtifactPath so that TranslateResourcePaths() can + // rewrite local absolute paths to snapshot paths during destroy. + b.Config.Workspace.FilePath = path.Join(snapshotPath, "src", "files") + b.Config.Workspace.ArtifactPath = path.Join(snapshotPath, "src", "artifacts") + return nil +} diff --git a/bundle/deploy/snapshot/translate_paths.go b/bundle/deploy/snapshot/translate_paths.go new file mode 100644 index 00000000000..1576197e7f4 --- /dev/null +++ b/bundle/deploy/snapshot/translate_paths.go @@ -0,0 +1,50 @@ +package snapshot + +import ( + "context" + "strings" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" +) + +type translateResourcePaths struct{} + +// TranslateResourcePaths replaces local absolute paths in resource configs with the +// remote snapshot path. It must run after snapshot.Upload() has set +// b.Config.Workspace.FilePath to the content-addressed snapshot location. +// +// translate_paths.go uses b.SyncRootPath as the remote root for immutable bundles, +// so resource paths are stored as local absolute paths until this mutator rewrites them. +func TranslateResourcePaths() bundle.Mutator { + return &translateResourcePaths{} +} + +func (m *translateResourcePaths) Name() string { return "snapshot.TranslateResourcePaths" } + +func (m *translateResourcePaths) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { + localPrefix := b.SyncRootPath + "/" + remotePrefix := b.Config.Workspace.FilePath + "/" + + err := b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) { + return dyn.Walk(root, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + if len(p) == 0 { + return v, nil + } + // Only rewrite paths inside the resources section. + if p[0] != dyn.Key("resources") { + return v, dyn.ErrSkip + } + str, ok := v.AsString() + if !ok { + return v, nil + } + if !strings.HasPrefix(str, localPrefix) { + return v, nil + } + return dyn.NewValue(remotePrefix+strings.TrimPrefix(str, localPrefix), v.Locations()), nil + }) + }) + return diag.FromErr(err) +} diff --git a/bundle/deploy/snapshot/upload.go b/bundle/deploy/snapshot/upload.go new file mode 100644 index 00000000000..86e8160ebfd --- /dev/null +++ b/bundle/deploy/snapshot/upload.go @@ -0,0 +1,66 @@ +package snapshot + +import ( + "context" + "fmt" + "path" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/log" +) + +type snapshotUpload struct { + // uploader allows test injection of a custom SnapshotUploader. + uploader SnapshotUploader +} + +// Upload returns a mutator that builds the bundle zip, uploads it via +// /api/2.0/repos/snapshots, and updates workspace.file_path and +// workspace.artifact_path to the content-addressed location returned by the API. +func Upload() bundle.Mutator { + return &snapshotUpload{} +} + +func (m *snapshotUpload) Name() string { + return "snapshot.Upload" +} + +func (m *snapshotUpload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + uploader := m.uploader + if uploader == nil { + var err error + uploader, err = NewSnapshotUploader(b.WorkspaceClient(ctx)) + if err != nil { + return diag.FromErr(err) + } + } + + cmdio.LogString(ctx, "Uploading immutable bundle snapshot...") + + zipContent, err := BundleZip(ctx, b) + if err != nil { + return diag.FromErr(fmt.Errorf("failed to build snapshot zip: %w", err)) + } + snapshotID := IDFromContent(zipContent) + log.Debugf(ctx, "snapshot.Upload: snapshotID=%s zip=%d bytes", snapshotID, len(zipContent)) + + info, err := uploader.Upload(ctx, b.Config.Bundle.Name, snapshotID, b.Config.Workspace.CurrentUser.UserName, zipContent) + if err != nil { + return diag.FromErr(err) + } + + log.Infof(ctx, "Snapshot uploaded to %s", info.Path) + + // The API unpacks the zip under a "src" subdirectory. + b.Config.Workspace.SnapshotPath = info.Path + b.Config.Workspace.FilePath = path.Join(info.Path, "src", "files") + // Only set artifact_path when artifacts are present; with no artifacts the + // zip has no "src/artifacts" directory and a get-status on it would 404. + if len(b.Config.Artifacts) > 0 { + b.Config.Workspace.ArtifactPath = path.Join(info.Path, "src", "artifacts") + } + + return nil +} diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index 3adb9e9e740..08a9be3086b 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -89,6 +89,9 @@ bundle: "fail_on_active_runs": "description": |- Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. + "immutable_folder": + "description": |- + Whether to upload bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a zip and uploaded via the snapshot API, and workspace.file_path and workspace.artifact_path are set to the returned content-addressed path. The validate and plan commands make no mutative API calls when this is enabled. "lock": "description": |- The deployment lock attributes. diff --git a/bundle/metadata/metadata.go b/bundle/metadata/metadata.go index 1c61cb093f0..e5549472696 100644 --- a/bundle/metadata/metadata.go +++ b/bundle/metadata/metadata.go @@ -15,6 +15,9 @@ type Bundle struct { type Workspace struct { FilePath string `json:"file_path"` + // SnapshotPath is the workspace path of the immutable snapshot uploaded + // during deployment. Only populated for bundles with bundle.immutable = true. + SnapshotPath string `json:"snapshot_path,omitempty"` } type Resource struct { diff --git a/bundle/permissions/workspace_root.go b/bundle/permissions/workspace_root.go index 9bb9065fe80..f33e223ed1b 100644 --- a/bundle/permissions/workspace_root.go +++ b/bundle/permissions/workspace_root.go @@ -28,6 +28,11 @@ func (*workspaceRootPermissions) Name() string { // Apply implements bundle.Mutator. func (*workspaceRootPermissions) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + // If the bundle is immutable, we don't need to apply any permissions to the workspace root. + if b.Config.Bundle.Deployment.ImmutableFolder { + return nil + } + stateFolderPermissions, err := giveAccessForWorkspaceRoot(ctx, b) if err != nil { return diag.FromErr(err) diff --git a/bundle/phases/build.go b/bundle/phases/build.go index 5a32435f8f1..c60db5235bc 100644 --- a/bundle/phases/build.go +++ b/bundle/phases/build.go @@ -14,9 +14,11 @@ import ( "github.com/databricks/cli/libs/logdiag" ) +// LibLocationMap maps artifact names to library locations that need uploading. +// Computed by Build and consumed by Deploy to upload the right files. type LibLocationMap map[string][]libraries.LocationToUpdate -// The build phase builds artifacts. +// Build runs the build phase, which builds artifacts. func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { log.Info(ctx, "Phase: build") @@ -24,6 +26,7 @@ func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { scripts.Execute(config.ScriptPreBuild), artifacts.Build(), scripts.Execute(config.ScriptPostBuild), + mutator.ResolveVariableReferencesWithoutResources( "artifacts", ), @@ -41,16 +44,20 @@ func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { libraries.SwitchToPatchedWheels(), ) - libs, diags := libraries.ReplaceWithRemotePath(ctx, b) - for _, diag := range diags { - logdiag.LogDiag(ctx, diag) + if logdiag.HasError(ctx) { + return nil } - bundle.ApplyContext(ctx, b, - // TransformWheelTask must be run after ReplaceWithRemotePath so we can use correct remote path in the - // transformed notebook - trampoline.TransformWheelTask(), - ) + // For immutable bundles, library remote paths are set in the deploy phase + // after snapshot.Upload() provides the content-addressed workspace.artifact_path. + if b.Config.Bundle.Deployment.ImmutableFolder { + return nil + } + libs, diags := libraries.ReplaceWithRemotePath(ctx, b) + for _, d := range diags { + logdiag.LogDiag(ctx, d) + } + bundle.ApplyContext(ctx, b, trampoline.TransformWheelTask()) return libs } diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index fd76151483c..851cac81d9a 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -12,6 +12,7 @@ import ( "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" "github.com/databricks/cli/bundle/deploy/metadata" + "github.com/databricks/cli/bundle/deploy/snapshot" "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/deployplan" "github.com/databricks/cli/bundle/direct" @@ -147,13 +148,38 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand bundle.ApplyContext(ctx, b, lock.Release(lock.GoalDeploy)) }() - uploadLibraries(ctx, b, libs) + if b.Config.Bundle.Deployment.ImmutableFolder { + // Upload all source files and built artifacts as a single immutable snapshot. + // The API assigns a content-addressed workspace.file_path; snapshot.TranslateResourcePaths() + // then replaces the local absolute paths (written by translate_paths during validate) + // with the actual snapshot remote paths. + bundle.ApplySeqContext(ctx, b, + snapshot.Upload(), + snapshot.TranslateResourcePaths(), + snapshot.SaveState(), + ) + if !logdiag.HasError(ctx) { + _, libDiags := libraries.ReplaceWithRemotePath(ctx, b) + for _, d := range libDiags { + logdiag.LogDiag(ctx, d) + } + } + } else { + uploadLibraries(ctx, b, libs) + } + if logdiag.HasError(ctx) { return } + if !b.Config.Bundle.Deployment.ImmutableFolder { + bundle.ApplySeqContext(ctx, b, files.Upload(outputHandler)) + if logdiag.HasError(ctx) { + return + } + } + bundle.ApplySeqContext(ctx, b, - files.Upload(outputHandler), deploy.StateUpdate(), deploy.StatePush(), permissions.ApplyWorkspaceRootPermissions(), diff --git a/bundle/phases/destroy.go b/bundle/phases/destroy.go index 74049f26f42..992c9fef445 100644 --- a/bundle/phases/destroy.go +++ b/bundle/phases/destroy.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" + "github.com/databricks/cli/bundle/deploy/snapshot" "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/deployplan" "github.com/databricks/cli/bundle/direct" @@ -132,17 +133,29 @@ func Destroy(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) { }() if !engine.IsDirect() { - bundle.ApplySeqContext(ctx, b, + mutators := []bundle.Mutator{ // We need to resolve artifact variable (how we do it in build phase) // because some of the to-be-destroyed resource might use this variable. // Not resolving might lead to terraform "Reference to undeclared resource" error mutator.ResolveVariableReferencesWithoutResources("artifacts"), mutator.ResolveVariableReferencesOnlyResources("artifacts"), + } + + if b.Config.Bundle.Deployment.ImmutableFolder { + // For immutable bundles, resource paths are local absolute paths after + // translate_paths. Restore workspace.file_path from the local state file + // and replace the local prefix with the snapshot remote path before + // Terraform processes the config. + mutators = append([]bundle.Mutator{snapshot.LoadState(), snapshot.TranslateResourcePaths()}, mutators...) + } + mutators = append(mutators, terraform.Interpolate(), terraform.Write(), terraform.Plan(terraform.PlanGoal("destroy")), ) + + bundle.ApplySeqContext(ctx, b, mutators...) } if logdiag.HasError(ctx) { diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 1afe4321798..782597b3a5e 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -2641,6 +2641,10 @@ "description": "Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted.", "$ref": "#/$defs/bool" }, + "immutable_folder": { + "description": "Whether to upload bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a zip and uploaded via the snapshot API, and workspace.file_path and workspace.artifact_path are set to the returned content-addressed path. The validate and plan commands make no mutative API calls when this is enabled.", + "$ref": "#/$defs/bool" + }, "lock": { "description": "The deployment lock attributes.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Lock" diff --git a/cmd/bundle/utils/process.go b/cmd/bundle/utils/process.go index d61c4525530..683477c465f 100644 --- a/cmd/bundle/utils/process.go +++ b/cmd/bundle/utils/process.go @@ -297,7 +297,6 @@ func ProcessBundleRet(cmd *cobra.Command, opts ProcessOptions) (b *bundle.Bundle } var libs phases.LibLocationMap - if opts.Build { t2 := time.Now() libs = phases.Build(ctx, b) diff --git a/libs/sync/sync.go b/libs/sync/sync.go index 6d7708c8b37..cee1057e82c 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -245,6 +245,35 @@ func (s *Sync) GetFileList(ctx context.Context) ([]fileset.File, error) { return all.Iter(), nil } +// GetFileList returns the list of files that would be synced given opts, +// applying the same git-aware include/exclude logic as RunOnce. +// Unlike New, it does not verify the remote path or load a sync snapshot. +func GetFileList(ctx context.Context, opts SyncOptions) ([]fileset.File, error) { + paths := opts.Paths + if len(paths) == 0 { + paths = []string{"."} + } + fileSet, err := git.NewFileSet(ctx, opts.WorktreeRoot, opts.LocalRoot, paths) + if err != nil { + return nil, fmt.Errorf("build file set: %w", err) + } + includeFileSet, err := fileset.NewGlobSet(opts.LocalRoot, opts.Include) + if err != nil { + return nil, err + } + excludeFileSet, err := fileset.NewGlobSet(opts.LocalRoot, opts.Exclude) + if err != nil { + return nil, err + } + s := &Sync{ + SyncOptions: &opts, + fileSet: fileSet, + includeFileSet: includeFileSet, + excludeFileSet: excludeFileSet, + } + return s.GetFileList(ctx) +} + func (s *Sync) RunContinuous(ctx context.Context) error { ticker := time.NewTicker(s.PollInterval) defer ticker.Stop() diff --git a/libs/testserver/handlers.go b/libs/testserver/handlers.go index 8f611a7c7c9..6af36e46219 100644 --- a/libs/testserver/handlers.go +++ b/libs/testserver/handlers.go @@ -1,9 +1,13 @@ package testserver import ( + "bytes" "encoding/base64" "encoding/json" "fmt" + "io" + "mime" + "mime/multipart" "net/http" "path" "strings" @@ -551,6 +555,45 @@ func AddDefaultHandlers(server *Server) { return req.Workspace.ReposDelete(req) }) + server.Handle("POST", "/api/2.0/repos/snapshots", func(req Request) any { + contentType := req.Headers.Get("Content-Type") + mediaType, params, err := mime.ParseMediaType(contentType) + if err != nil || !strings.HasPrefix(mediaType, "multipart/") { + return Response{StatusCode: http.StatusBadRequest} + } + + mr := multipart.NewReader(bytes.NewReader(req.Body), params["boundary"]) + var bundleID, snapshotID string + for { + p, err := mr.NextPart() + if err == io.EOF { + break + } + if err != nil { + return Response{StatusCode: http.StatusInternalServerError} + } + data, err := io.ReadAll(p) + if err != nil { + return Response{StatusCode: http.StatusInternalServerError} + } + switch p.FormName() { + case "bundle_id": + bundleID = string(data) + case "snapshot_id": + snapshotID = string(data) + } + } + + // The real API uses the workspace user UUID (not email) in the snapshot path, + // matching service-principal identities used in cloud acceptance tests. + snapshotPath := fmt.Sprintf("/Workspace/Users/%s/.snapshots/%s/%s", TestUserSP.UserName, bundleID, snapshotID) + return map[string]any{ + "snapshot": map[string]any{ + "path": snapshotPath, + }, + } + }) + // SQL Warehouses: server.Handle("GET", "/api/2.0/sql/warehouses/{warehouse_id}", func(req Request) any {