-
Notifications
You must be signed in to change notification settings - Fork 184
Immutable folder support in DABs #5254
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
b685ec2
a429b26
e7c1968
67914d0
549492a
aedfdb0
eddec61
27a6b02
4a9bcd9
ebd26ea
5efe1da
6215a49
7b44126
be6dec3
0f1ed52
d978559
9a6c898
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| bundle: | ||
| name: test-bundle-immutable-no-artifacts-$UNIQUE_NAME | ||
| deployment: | ||
| immutable_folder: true | ||
|
|
||
| resources: | ||
| jobs: | ||
| my_job: | ||
| name: my job | ||
| tasks: | ||
| - task_key: spark_python_task | ||
| spark_python_task: | ||
| python_file: ./src/main.py | ||
| environment_key: env | ||
| - task_key: notebook_task | ||
| notebook_task: | ||
| notebook_path: ./src/notebook.py | ||
|
|
||
| environments: | ||
| - environment_key: env | ||
| spec: | ||
| environment_version: "4" |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
|
|
||
| >>> [CLI] bundle validate | ||
| Name: test-bundle-immutable-no-artifacts-[UNIQUE_NAME] | ||
| Target: default | ||
| Workspace: | ||
| User: [USERNAME] | ||
| Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/default | ||
|
|
||
| Validation OK! | ||
|
|
||
| >>> [CLI] bundle deploy | ||
| Uploading immutable bundle snapshot... | ||
| Deploying resources... | ||
| Updating deployment state... | ||
| Deployment complete! | ||
|
|
||
| >>> [CLI] jobs get [NUMID] | ||
| "/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/main.py" | ||
|
|
||
| >>> [CLI] jobs get [NUMID] | ||
| "/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/notebook" | ||
|
|
||
| >>> [CLI] bundle destroy --auto-approve | ||
| The following resources will be deleted: | ||
| delete resources.jobs.my_job | ||
|
|
||
| All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/default | ||
|
|
||
| Deleting files... | ||
| Destroy complete! | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| envsubst < databricks.yml.tmpl > databricks.yml | ||
| cleanup() { | ||
| trace $CLI bundle destroy --auto-approve | ||
| } | ||
| trap cleanup EXIT | ||
|
|
||
| trace $CLI bundle validate | ||
| trace $CLI bundle deploy | ||
|
|
||
|
|
||
| # Get a job and check that task paths point into the snapshot | ||
| JOB_ID=$($CLI bundle summary -o json | jq -r '.resources.jobs.my_job.id') | ||
| trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.spark_python_task != null) | .spark_python_task.python_file' | ||
| trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.notebook_task != null) | .notebook_task.notebook_path' |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| print("Hello from Spark Python Task!") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| # Databricks notebook source | ||
|
|
||
| print("Hello from Notebook Task!") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| Local = true | ||
| Cloud = true | ||
|
|
||
| Ignore = [ | ||
| "databricks.yml", | ||
| ".databricks", | ||
| ".venv", | ||
| "script", | ||
| "*.pyc", | ||
| ] | ||
|
|
||
| # Normalize the content-addressed snapshot hash so it doesn't need to be | ||
| # hardcoded in output.txt and the test stays stable across file changes. | ||
| [[Repls]] | ||
| Old = '[0-9a-f]{64}' | ||
| New = '[SNAPSHOT_HASH]' |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| bundle: | ||
| name: test-bundle-immutable-$UNIQUE_NAME | ||
| deployment: | ||
| immutable_folder: true | ||
|
|
||
| artifacts: | ||
| python_artifact: | ||
| type: whl | ||
| build: uv build --wheel | ||
|
|
||
| resources: | ||
| jobs: | ||
| my_job: | ||
| name: my job | ||
| tasks: | ||
| - task_key: spark_python_task | ||
| spark_python_task: | ||
| python_file: ./src/main.py | ||
| environment_key: env | ||
| - task_key: notebook_task | ||
| notebook_task: | ||
| notebook_path: ./src/notebook.py | ||
| - task_key: python_wheel_task | ||
| python_wheel_task: | ||
| package_name: immutable | ||
| entry_point: main | ||
| environment_key: env | ||
| environments: | ||
| - environment_key: env | ||
| spec: | ||
| environment_version: "4" | ||
| dependencies: | ||
| - ./dist/*.whl |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
|
|
||
| >>> [CLI] bundle validate | ||
| Name: test-bundle-immutable-[UNIQUE_NAME] | ||
| Target: default | ||
| Workspace: | ||
| User: [USERNAME] | ||
| Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-[UNIQUE_NAME]/default | ||
|
|
||
| Validation OK! | ||
|
|
||
| >>> [CLI] bundle deploy | ||
| Building python_artifact... | ||
| Uploading immutable bundle snapshot... | ||
| Deploying resources... | ||
| Updating deployment state... | ||
| Deployment complete! | ||
|
|
||
| >>> [CLI] jobs get [NUMID] | ||
| "/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/main.py" | ||
|
|
||
| >>> [CLI] jobs get [NUMID] | ||
| "/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/files/src/notebook" | ||
|
|
||
| >>> [CLI] jobs get [NUMID] | ||
| [ | ||
| "/Workspace/Users/[UUID]/.snapshots/test-bundle-immutable-[UNIQUE_NAME]/[SNAPSHOT_HASH]/src/artifacts/.internal/immutable-0.0.1-py3-none-any.whl" | ||
| ] | ||
|
|
||
| >>> [CLI] bundle run my_job | ||
| script: line 182: sort_lines: command not found | ||
| Run URL: [DATABRICKS_URL]/?o=[NUMID]#job/[NUMID]/run/[NUMID] | ||
|
|
||
| [TIMESTAMP] "my job" RUNNING | ||
| [TIMESTAMP] "my job" TERMINATED SUCCESS | ||
|
|
||
| >>> [CLI] bundle destroy --auto-approve | ||
| The following resources will be deleted: | ||
| delete resources.jobs.my_job | ||
|
|
||
| All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-[UNIQUE_NAME]/default | ||
|
|
||
| Deleting files... | ||
| Destroy complete! | ||
|
|
||
| Exit code: 127 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| [project] | ||
| name = "immutable" | ||
| version = "0.0.1" | ||
| authors = [{ name = "andrew.nester@databricks.com" }] | ||
| requires-python = ">=3.10,<3.13" | ||
| dependencies = [ | ||
| # Any dependencies for jobs and pipelines in this project can be added here | ||
| # See also https://docs.databricks.com/dev-tools/bundles/library-dependencies | ||
| # | ||
| # LIMITATION: for pipelines, dependencies are cached during development; | ||
| # add dependencies to the 'environment' section of your pipeline.yml file instead | ||
| ] | ||
|
|
||
| [dependency-groups] | ||
| dev = [ | ||
| "pytest", | ||
| "ruff", | ||
| "databricks-dlt", | ||
| "databricks-connect>=15.4,<15.5", | ||
| "ipykernel", | ||
| ] | ||
|
|
||
| [project.scripts] | ||
| main = "immutable.main:main" | ||
|
|
||
| [build-system] | ||
| requires = ["hatchling"] | ||
| build-backend = "hatchling.build" | ||
|
|
||
| [tool.ruff] | ||
| line-length = 120 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| envsubst < databricks.yml.tmpl > databricks.yml | ||
| cleanup() { | ||
| trace $CLI bundle destroy --auto-approve | ||
| } | ||
| trap cleanup EXIT | ||
|
|
||
| trace $CLI bundle validate | ||
| trace $CLI bundle deploy | ||
|
|
||
|
|
||
| # Get a job and check that task paths are immutable | ||
| JOB_ID=$($CLI bundle summary -o json | jq -r '.resources.jobs.my_job.id') | ||
| trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.spark_python_task != null) | .spark_python_task.python_file' | ||
| trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.notebook_task != null) | .notebook_task.notebook_path' | ||
| trace $CLI jobs get $JOB_ID | jq '.settings.environments[0].spec.dependencies' | ||
|
|
||
| # Sort output to make it stable | ||
| trace $CLI bundle run my_job | sort_lines |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| def main(): | ||
| print("Hello from Python Wheel Task!") | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| print("Hello from Spark Python Task!") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| # Databricks notebook source | ||
|
|
||
| print("Hello from Notebook Task!") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| Local = false | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we can run locally as well? As long as we fix the snapshot API impl in test server? |
||
| Cloud = true | ||
|
|
||
| Ignore = [ | ||
| "dist", | ||
| "databricks.yml", | ||
| ".databricks", | ||
| ".venv", | ||
| "script", | ||
| "*.pyc", | ||
| ] | ||
|
|
||
| [[Repls]] | ||
| # Replace snapshot hash with SNAPSHOT_HASH | ||
| Old = "[0-9a-f]{64}" | ||
| New = "[SNAPSHOT_HASH]" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| bundle: | ||
| name: my-bundle | ||
| deployment: | ||
| immutable_folder: true | ||
|
|
||
| sync: | ||
| exclude: | ||
| # Test framework files that are not part of the bundle source. | ||
| - "repls.json" | ||
| - "user_repls.json" | ||
| - "script" | ||
| - "*.toml" | ||
|
|
||
| resources: | ||
| jobs: | ||
| my_job: | ||
| name: my job | ||
| tasks: | ||
| - task_key: my_task | ||
| existing_cluster_id: "0101-120000-aaaaaaaa" | ||
| spark_python_task: | ||
| python_file: ./src/main.py |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
|
|
||
| >>> [CLI] bundle validate -o json | ||
| Warning: Pattern user_repls.json does not match any files | ||
| at sync.exclude[1] | ||
| in databricks.yml:10:7 | ||
|
|
||
| { | ||
| "workspace": { | ||
| "artifact_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/artifacts", | ||
| "current_user": { | ||
| "domain_friendly_name": "[USERNAME]", | ||
| "id": "[USERID]", | ||
| "short_name": "[USERNAME]", | ||
| "userName": "[USERNAME]" | ||
| }, | ||
| "file_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/files", | ||
| "resource_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/resources", | ||
| "root_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default", | ||
| "state_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/state" | ||
| }, | ||
| "tasks": [ | ||
| { | ||
| "existing_cluster_id": "0101-120000-aaaaaaaa", | ||
| "spark_python_task": { | ||
| "python_file": "[TEST_TMP_DIR]/src/main.py" | ||
| }, | ||
| "task_key": "my_task" | ||
| } | ||
| ] | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| trace $CLI bundle validate -o json | jq '{workspace: .workspace, tasks: .resources.jobs.my_job.tasks}' |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| print("hello") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| Local = true | ||
| Cloud = false | ||
| Ignore = [".databricks"] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -59,6 +59,11 @@ type resolveVariableReferences struct { | |
| includeResources bool | ||
|
|
||
| artifactsReferenceUsed bool | ||
|
|
||
| // excludePaths lists variable reference paths (e.g. "workspace.file_path") whose | ||
| // resolution should be skipped. References to these paths remain unresolved so a | ||
| // later mutator can set the value and re-run resolution. | ||
| excludePaths []string | ||
| } | ||
|
|
||
| func ResolveVariableReferencesOnlyResources(prefixes ...string) bundle.Mutator { | ||
|
|
@@ -229,6 +234,9 @@ func (m *resolveVariableReferences) resolveOnce(b *bundle.Bundle, prefixes []dyn | |
|
|
||
| // Perform resolution only if the path starts with one of the specified prefixes. | ||
| if slices.ContainsFunc(prefixes, path.HasPrefix) { | ||
| if slices.Contains(m.excludePaths, path.String()) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. very nitpicky: Make this more robust? This matches substrings - so "abc" would match "abc" and "abcd". Normally in the codebase - paths refer to exact paths, not patterns or substrings. |
||
| return dyn.InvalidValue, dynvar.ErrSkipResolution | ||
| } | ||
| value, err := m.lookupFn(normalized, path, b) | ||
| hasUpdates = hasUpdates || (err == nil && value.IsValid()) | ||
| return value, err | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -38,13 +38,16 @@ func (p processStaticResources) Apply(ctx context.Context, b *bundle.Bundle) dia | |
| // we need to resolve variables because they can change path values: | ||
| // - variable can be used a prefix | ||
| // - path can be part of a complex variable value | ||
|
|
||
| resourceResolver := mutator.ResolveVariableReferencesOnlyResources() | ||
|
|
||
| bundle.ApplySeqContext( | ||
| ctx, | ||
| b, | ||
| // Reads (dynamic): * (strings) (searches for variable references in string values) | ||
| // Updates (dynamic): resources.* (strings) (resolves variable references to their actual values) | ||
| // Resolves variable references in 'resources' using bundle, workspace, and variables prefixes | ||
| mutator.ResolveVariableReferencesOnlyResources(), | ||
| resourceResolver, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why this diff? |
||
| mutator.NormalizePaths(), | ||
|
|
||
| // Translate dashboard paths into paths in the workspace file system | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we assert that destroy deletes the snapshot? Even when .databricks is removed?