Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ jobs:
enable-cache: true
- uses: databricks/setup-cli@main
with:
version: 0.279.0
version: 0.280.0
- name: Install the project
run: uv sync --locked --all-extras
- name: Install Databricks Connect
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/deploy-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
enable-cache: true
- uses: databricks/setup-cli@main
with:
version: 0.279.0
version: 0.280.0
- name: Install the project
run: uv sync --locked --all-extras
- name: Check Databricks CLI
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
enable-cache: true
- uses: databricks/setup-cli@main
with:
version: 0.279.0
version: 0.280.0
- name: Install the project
run: uv sync --locked --all-extras
- name: Check Databricks CLI
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/validate-bundle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
enable-cache: true
- uses: databricks/setup-cli@main
with:
version: 0.279.0
version: 0.280.0
- name: Install the project
run: uv sync --locked --all-extras
- name: Check Databricks CLI
Expand Down
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ A script exists set up the (Free) Workspace as described in [scripts/setup_works
* uv: https://docs.astral.sh/uv/getting-started/installation/
* `uv` will default to Python version specified in [.python-version](.python-version)
* Databricks CLI: https://docs.databricks.com/aws/en/dev-tools/cli/install
* ">=0.259.0" for Python based workflows with `environment_version`
* ">=0.269.0" due to 'lifecycle prevent_destroy'

### Setup environment

Expand Down Expand Up @@ -156,7 +156,6 @@ uv run ./scripts/setup_workspace.py

## TODO:

* Resources (volume, schema, permissions)
* Streaming example
* Logging
* Logging to volume
27 changes: 19 additions & 8 deletions databricks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,21 @@
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
bundle:
name: dab_project
databricks_cli_version: ">=0.259.0"
databricks_cli_version: ">=0.269.0"

variables:
catalog_name:
description: The name of the catalog to use for this bundle.
description: The name of the catalog to use for this bundle (set in targets).
service_principal_id:
description: The service principal ID to use for running jobs (auto-lookup).
serverless_environment_version:
description: The serverless environment to use for this bundle.
default: "4"
dbt_sql_warehouse_id:
description: The ID of the SQL warehouse to use for dbt tasks.
description: The ID of the SQL warehouse to use for dbt tasks (auto-lookup).
# Lookup the warehouse ID by name.
lookup:
warehouse: "Serverless Starter Warehouse"

include:
- resources/*.yml
Expand Down Expand Up @@ -67,7 +72,7 @@ targets:
trigger_pause_status: PAUSED
variables:
catalog_name: "lake_dev"
dbt_sql_warehouse_id: "c31436beb8eee93c"
service_principal_id: "unused for dev target"
workspace:
host: https://dbc-ea4bb8e5-6935.cloud.databricks.com
root_path: /Workspace/Users/${workspace.current_user.userName}/.bundle/${bundle.name}
Expand All @@ -79,22 +84,28 @@ targets:
trigger_pause_status: UNPAUSED
variables:
catalog_name: "lake_test"
dbt_sql_warehouse_id: "c31436beb8eee93c"
service_principal_id:
# lookup the dev service principal ID by name
lookup:
service_principal: "sp_etl_dev"
workspace:
host: https://dbc-ea4bb8e5-6935.cloud.databricks.com
root_path: /Workspace/bundle/${bundle.name}/${bundle.target}
run_as:
service_principal_name: 280a0e2e-369a-440f-8bf1-8da8c975e077
service_principal_name: ${var.service_principal_id}

prod:
mode: production
presets:
trigger_pause_status: UNPAUSED
variables:
catalog_name: "lake_prod"
dbt_sql_warehouse_id: "c31436beb8eee93c"
service_principal_id:
# lookup the prod service principal ID by name
lookup:
service_principal: "sp_etl_prod"
workspace:
host: https://dbc-ea4bb8e5-6935.cloud.databricks.com
root_path: /Workspace/bundle/${bundle.name}/${bundle.target}
run_as:
service_principal_name: 255b38e1-a8ec-40cf-8e27-e640276bef5d
service_principal_name: ${var.service_principal_id}
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dbt = [
]
# Development & Testing
dev = [
"databricks-bundles==0.279.*", # For Python-based Workflows
"databricks-bundles==0.280.*", # For Python-based Workflows
"mypy", # Type hints
"pip", # Databricks extension needs it
"pytest", # Unit testing
Expand Down
33 changes: 33 additions & 0 deletions resources/schemas.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
resources:
schemas:
# The pipeline would create schemas automatically, so this is more of a proof of concept
silver_schema:
name: silver
catalog_name: ${var.catalog_name}
comment: "Silver schema for cleaned and processed data"
lifecycle:
prevent_destroy: true
grants:
- principal: group_etl
privileges:
- ALL_PRIVILEGES
- principal: group_reader
privileges:
- USE_SCHEMA
- SELECT

gold_schema:
name: gold
catalog_name: ${var.catalog_name}
comment: "Gold schema for aggregated and business-ready data"
lifecycle:
prevent_destroy: true
grants:
- principal: group_etl
privileges:
- ALL_PRIVILEGES
- principal: group_reader
privileges:
- USE_SCHEMA
- SELECT

37 changes: 28 additions & 9 deletions resources/volumes.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,32 @@
resources:
volumes:
checkpoints_volume:
# These volumes are unused and exist only as proof of concept
silver_checkpoints_volume:
name: checkpoints
catalog_name: ${var.catalog_name}
schema_name: default
# grants:
# - privileges:
# - ALL PRIVILEGES
# principal: group_etl
# - privileges:
# - READ VOLUME
# principal: group_reader
schema_name: silver
comment: "Silver volume for Spark Streaming checkpoints"
lifecycle:
prevent_destroy: true
grants:
- principal: group_etl
privileges:
- ALL_PRIVILEGES
- principal: group_reader
privileges:
- READ_VOLUME

gold_checkpoints_volume:
name: checkpoints
catalog_name: ${var.catalog_name}
schema_name: gold
comment: "Gold volume for Spark Streaming checkpoints"
lifecycle:
prevent_destroy: true
grants:
- principal: group_etl
privileges:
- ALL_PRIVILEGES
- principal: group_reader
privileges:
- READ_VOLUME
Loading