Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
bundle:
name: test-bundle-immutable-no-artifacts-$UNIQUE_NAME
deployment:
immutable_folder: true

resources:
jobs:
my_job:
name: my job
tasks:
- task_key: spark_python_task
spark_python_task:
python_file: ./src/main.py
environment_key: env
- task_key: notebook_task
notebook_task:
notebook_path: ./src/notebook.py

environments:
- environment_key: env
spec:
environment_version: "4"
3 changes: 3 additions & 0 deletions acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions acceptance/bundle/deploy/immutable-no-artifacts/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@

>>> [CLI] bundle validate
Name: test-bundle-immutable-no-artifacts-[UNIQUE_NAME]
Target: default
Workspace:
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/default

Validation OK!

>>> [CLI] bundle deploy
Uploading immutable bundle snapshot...
Deploying resources...
Updating deployment state...
Deployment complete!

>>> [CLI] jobs get [NUMID]
"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/main.py"

>>> [CLI] jobs get [NUMID]
"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/notebook"

>>> [CLI] bundle destroy --auto-approve

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we assert that destroy deletes the snapshot? Even when .databricks is removed?

The following resources will be deleted:
delete resources.jobs.my_job

All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/default

Deleting files...
Destroy complete!
14 changes: 14 additions & 0 deletions acceptance/bundle/deploy/immutable-no-artifacts/script
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
envsubst < databricks.yml.tmpl > databricks.yml
cleanup() {
trace $CLI bundle destroy --auto-approve
}
trap cleanup EXIT

trace $CLI bundle validate
trace $CLI bundle deploy


# Get a job and check that task paths point into the snapshot
JOB_ID=$($CLI bundle summary -o json | jq -r '.resources.jobs.my_job.id')
trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.spark_python_task != null) | .spark_python_task.python_file'
trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.notebook_task != null) | .notebook_task.notebook_path'
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
print("Hello from Spark Python Task!")
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Databricks notebook source

print("Hello from Notebook Task!")
16 changes: 16 additions & 0 deletions acceptance/bundle/deploy/immutable-no-artifacts/test.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Local = true
Cloud = true

Ignore = [
"databricks.yml",
".databricks",
".venv",
"script",
"*.pyc",
]

# Normalize the content-addressed snapshot hash so it doesn't need to be
# hardcoded in output.txt and the test stays stable across file changes.
[[Repls]]
Old = '[0-9a-f]{64}'
New = '[SNAPSHOT_HASH]'
33 changes: 33 additions & 0 deletions acceptance/bundle/deploy/immutable/databricks.yml.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
bundle:
name: test-bundle-immutable-$UNIQUE_NAME
deployment:
immutable_folder: true

artifacts:
python_artifact:
type: whl
build: uv build --wheel

resources:
jobs:
my_job:
name: my job
tasks:
- task_key: spark_python_task
spark_python_task:
python_file: ./src/main.py
environment_key: env
- task_key: notebook_task
notebook_task:
notebook_path: ./src/notebook.py
- task_key: python_wheel_task
python_wheel_task:
package_name: immutable
entry_point: main
environment_key: env
environments:
- environment_key: env
spec:
environment_version: "4"
dependencies:
- ./dist/*.whl
3 changes: 3 additions & 0 deletions acceptance/bundle/deploy/immutable/out.test.toml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

45 changes: 45 additions & 0 deletions acceptance/bundle/deploy/immutable/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@

>>> [CLI] bundle validate
Name: test-bundle-immutable-[UNIQUE_NAME]
Target: default
Workspace:
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-[UNIQUE_NAME]/default

Validation OK!

>>> [CLI] bundle deploy
Building python_artifact...
Uploading immutable bundle snapshot...
Deploying resources...
Updating deployment state...
Deployment complete!

>>> [CLI] jobs get [NUMID]
"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/main.py"

>>> [CLI] jobs get [NUMID]
"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/notebook"

>>> [CLI] jobs get [NUMID]
[
"/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/artifacts/.internal/immutable-0.0.1-py3-none-any.whl"
]

>>> [CLI] bundle run my_job
script: line 182: sort_lines: command not found
Run URL: [DATABRICKS_URL]/?o=[NUMID]#job/[NUMID]/run/[NUMID]

[TIMESTAMP] "my job" RUNNING
[TIMESTAMP] "my job" TERMINATED SUCCESS

>>> [CLI] bundle destroy --auto-approve
The following resources will be deleted:
delete resources.jobs.my_job

All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-[UNIQUE_NAME]/default

Deleting files...
Destroy complete!

Exit code: 127
31 changes: 31 additions & 0 deletions acceptance/bundle/deploy/immutable/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
[project]
name = "immutable"
version = "0.0.1"
authors = [{ name = "andrew.nester@databricks.com" }]
requires-python = ">=3.10,<3.13"
dependencies = [
# Any dependencies for jobs and pipelines in this project can be added here
# See also https://docs.databricks.com/dev-tools/bundles/library-dependencies
#
# LIMITATION: for pipelines, dependencies are cached during development;
# add dependencies to the 'environment' section of your pipeline.yml file instead
]

[dependency-groups]
dev = [
"pytest",
"ruff",
"databricks-dlt",
"databricks-connect>=15.4,<15.5",
"ipykernel",
]

[project.scripts]
main = "immutable.main:main"

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.ruff]
line-length = 120
18 changes: 18 additions & 0 deletions acceptance/bundle/deploy/immutable/script
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
envsubst < databricks.yml.tmpl > databricks.yml
cleanup() {
trace $CLI bundle destroy --auto-approve
}
trap cleanup EXIT

trace $CLI bundle validate
trace $CLI bundle deploy


# Get a job and check that task paths are immutable
JOB_ID=$($CLI bundle summary -o json | jq -r '.resources.jobs.my_job.id')
trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.spark_python_task != null) | .spark_python_task.python_file'
trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.notebook_task != null) | .notebook_task.notebook_path'
trace $CLI jobs get $JOB_ID | jq '.settings.environments[0].spec.dependencies'

# Sort output to make it stable
trace $CLI bundle run my_job | sort_lines
Empty file.
6 changes: 6 additions & 0 deletions acceptance/bundle/deploy/immutable/src/immutable/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
def main():
print("Hello from Python Wheel Task!")


if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions acceptance/bundle/deploy/immutable/src/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
print("Hello from Spark Python Task!")
3 changes: 3 additions & 0 deletions acceptance/bundle/deploy/immutable/src/notebook.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Databricks notebook source

print("Hello from Notebook Task!")
16 changes: 16 additions & 0 deletions acceptance/bundle/deploy/immutable/test.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Local = false

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can run locally as well? As long as we fix the snapshot API impl in test server?

Cloud = true

Ignore = [
"dist",
"databricks.yml",
".databricks",
".venv",
"script",
"*.pyc",
]

[[Repls]]
# Replace snapshot hash with SNAPSHOT_HASH
Old = "[0-9a-f]{64}"
New = "[SNAPSHOT_HASH]"
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
bundle:
name: my-bundle
deployment:
immutable_folder: true

sync:
exclude:
# Test framework files that are not part of the bundle source.
- "repls.json"
- "user_repls.json"
- "script"
- "*.toml"

resources:
jobs:
my_job:
name: my job
tasks:
- task_key: my_task
existing_cluster_id: "0101-120000-aaaaaaaa"
spark_python_task:
python_file: ./src/main.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions acceptance/bundle/validate/immutable_workspace_paths/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@

>>> [CLI] bundle validate -o json
Warning: Pattern user_repls.json does not match any files
at sync.exclude[1]
in databricks.yml:10:7

{
"workspace": {
"artifact_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/artifacts",
"current_user": {
"domain_friendly_name": "[USERNAME]",
"id": "[USERID]",
"short_name": "[USERNAME]",
"userName": "[USERNAME]"
},
"file_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/files",
"resource_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/resources",
"root_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default",
"state_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/state"
},
"tasks": [
{
"existing_cluster_id": "0101-120000-aaaaaaaa",
"spark_python_task": {
"python_file": "[TEST_TMP_DIR]/src/main.py"
},
"task_key": "my_task"
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
trace $CLI bundle validate -o json | jq '{workspace: .workspace, tasks: .resources.jobs.my_job.tasks}'
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
print("hello")
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Local = true
Cloud = false
Ignore = [".databricks"]
7 changes: 7 additions & 0 deletions bundle/config/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ type Deployment struct {
// running jobs or pipelines in the workspace. Defaults to false.
FailOnActiveRuns bool `json:"fail_on_active_runs,omitempty"`

// ImmutableFolder specifies that bundle files and artifacts are uploaded as a
// single immutable snapshot rather than being synced individually. When true,
// the deployment calls /api/2.0/repos/snapshots with a zip containing all files
// and sets workspace.file_path and workspace.artifact_path to the returned
// content-addressed path. validate and plan make no mutative API calls.
ImmutableFolder bool `json:"immutable_folder,omitempty"`

// Lock configures locking behavior on deployment.
Lock Lock `json:"lock,omitempty"`
}
8 changes: 8 additions & 0 deletions bundle/config/mutator/resolve_variable_references.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ type resolveVariableReferences struct {
includeResources bool

artifactsReferenceUsed bool

// excludePaths lists variable reference paths (e.g. "workspace.file_path") whose
// resolution should be skipped. References to these paths remain unresolved so a
// later mutator can set the value and re-run resolution.
excludePaths []string
}

func ResolveVariableReferencesOnlyResources(prefixes ...string) bundle.Mutator {
Expand Down Expand Up @@ -229,6 +234,9 @@ func (m *resolveVariableReferences) resolveOnce(b *bundle.Bundle, prefixes []dyn

// Perform resolution only if the path starts with one of the specified prefixes.
if slices.ContainsFunc(prefixes, path.HasPrefix) {
if slices.Contains(m.excludePaths, path.String()) {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

very nitpicky: Make this more robust? This matches substrings - so "abc" would match "abc" and "abcd". Normally in the codebase - paths refer to exact paths, not patterns or substrings.

return dyn.InvalidValue, dynvar.ErrSkipResolution
}
value, err := m.lookupFn(normalized, path, b)
hasUpdates = hasUpdates || (err == nil && value.IsValid())
return value, err
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,16 @@ func (p processStaticResources) Apply(ctx context.Context, b *bundle.Bundle) dia
// we need to resolve variables because they can change path values:
// - variable can be used a prefix
// - path can be part of a complex variable value

resourceResolver := mutator.ResolveVariableReferencesOnlyResources()

bundle.ApplySeqContext(
ctx,
b,
// Reads (dynamic): * (strings) (searches for variable references in string values)
// Updates (dynamic): resources.* (strings) (resolves variable references to their actual values)
// Resolves variable references in 'resources' using bundle, workspace, and variables prefixes
mutator.ResolveVariableReferencesOnlyResources(),
resourceResolver,

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why this diff?

mutator.NormalizePaths(),

// Translate dashboard paths into paths in the workspace file system
Expand Down
11 changes: 7 additions & 4 deletions bundle/config/mutator/translate_paths.go
Original file line number Diff line number Diff line change
Expand Up @@ -320,11 +320,14 @@ func (t *translateContext) rewriteValue(ctx context.Context, p dyn.Path, v dyn.V
}

func applyTranslations(ctx context.Context, b *bundle.Bundle, t *translateContext, translations []func(context.Context, dyn.Value) (dyn.Value, error)) diag.Diagnostics {
// Set the remote root to the sync root if source-linked deployment is enabled.
// Otherwise, set it to the workspace file path.
if config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment) {
switch {
case b.Config.Bundle.Deployment.ImmutableFolder:
// Keep paths as local absolute paths during validate. snapshot.TranslateResourcePaths()
// replaces this local prefix with the actual snapshot path after upload.
t.remoteRoot = t.b.SyncRootPath
} else {
case config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment):
t.remoteRoot = t.b.SyncRootPath
default:
t.remoteRoot = t.b.Config.Workspace.FilePath
}

Expand Down
Loading
Loading