From 94aa92995de1d07e126543bbb894c38e7686470d Mon Sep 17 00:00:00 2001 From: frizzleqq Date: Sun, 10 Aug 2025 12:41:57 +0000 Subject: [PATCH 1/4] convert to notebook --- tests/notebook_run_pytest.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tests/notebook_run_pytest.py b/tests/notebook_run_pytest.py index 95c632b..593a293 100644 --- a/tests/notebook_run_pytest.py +++ b/tests/notebook_run_pytest.py @@ -1,14 +1,26 @@ -import sys +# Databricks notebook source +# MAGIC %pip install pytest + +# COMMAND ---------- + +dbutils.library.restartPython() +# COMMAND ---------- + +import sys import pytest -sys.dont_write_bytecode = True # Prevent writing .pyc files +# Prevent writing .pyc files +sys.dont_write_bytecode = True +# Add src to python path +sys.path.append("../src") + +# COMMAND ---------- pytest_result = pytest.main( [ ".", "-v", - "-x", ] ) From 72ca4a3be4178f4c30a494c26bb4d911a474dd47 Mon Sep 17 00:00:00 2001 From: frizzleqq Date: Sun, 10 Aug 2025 14:44:49 +0200 Subject: [PATCH 2/4] update readme --- README.md | 15 +-------------- scratch/README.md | 14 +++++++++----- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 352ad5e..39c32ac 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ The project is configured using `pyproject.toml` (Python specifics) and `databri | Directory | Description | |-----------|-------------| | `.github/workflows` | CI/CD jobs to test and deploy bundle | -| `dab_project` | Python project (Used in Databricks Workflow as Python-Wheel-Task) | +| `src/dab_project` | Python project (Used in Databricks Workflow as Python-Wheel-Task) | | `dbt` | [dbt](https://github.com/dbt-labs/dbt-core) project
* Used in Databricks Workflow as dbt-Task
* dbt-Models used from https://github.com/dbt-labs/jaffle_shop_duckdb | | `resources` | Resources such as Databricks Workflows or Databricks Volumes/Schemas
* Python-based workflow: https://docs.databricks.com/aws/en/dev-tools/bundles/python
* YAML-based Workflow: https://docs.databricks.com/aws/en/dev-tools/bundles/resources#job | | `scripts` | Python script to setup groups, service principals and catalogs used in a Databricks (Free Edition) workspace | @@ -131,19 +131,6 @@ uv run ./scripts/setup_workspace.py ## FAQ -* Why no `src` directory? - - Working in Databricks Git Repos automatically adds the root of the Git Repo to Python `sys.path`. - - This way Notebooks in the Git Repo can run `import dab_project` to import the local Python package during development without explicitly installing the package on the Cluster. - - A Notebook outside the Git Repo can do `import os; os.chdir("/Workspace/Users/...")` to act like it is within the Git Repo. - - Using a `src` directory requires changing the `sys.path` during development (without package installed) in a Databricks Git Repo. - ```python - import sys - sys.path.append("../src") - ``` * Service Principals For this example, the targets `test` and `prod` use a group and service principals. diff --git a/scratch/README.md b/scratch/README.md index 1eda061..5f2fbcc 100644 --- a/scratch/README.md +++ b/scratch/README.md @@ -1,7 +1,11 @@ -# Notebooks +# Scratch -This folder contains Databricks Notebook style files (as `.py`). +This folder is reserved for personal, exploratory notebooks. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. -They access `dab_project` Python project, so it can be used as: -* Databricks Git Repo -* Local IDE (Project installed as 'editable') \ No newline at end of file +Using the `src` directory requires appending to the `sys.path`: + +```python +import sys +sys.path.append("../src") +``` \ No newline at end of file From 97f0fbd428a8cb67b78a3af62f84c306b2b361af Mon Sep 17 00:00:00 2001 From: frizzleqq Date: Sun, 10 Aug 2025 14:53:31 +0200 Subject: [PATCH 3/4] move to src directory --- pyproject.toml | 15 +++++++++------ {dab_project => src/dab_project}/__init__.py | 0 {dab_project => src/dab_project}/__main__.py | 0 {dab_project => src/dab_project}/catalog.py | 0 {dab_project => src/dab_project}/cli.py | 0 {dab_project => src/dab_project}/databricks.py | 0 {dab_project => src/dab_project}/delta.py | 0 {dab_project => src/dab_project}/spark.py | 0 .../dab_project}/tasks/__init__.py | 0 .../dab_project}/tasks/base_task.py | 0 .../dab_project}/tasks/bronze_accuweather.py | 0 .../dab_project}/tasks/bronze_nyctaxi.py | 0 .../tasks/silver_nyctaxi_aggregated.py | 0 13 files changed, 9 insertions(+), 6 deletions(-) rename {dab_project => src/dab_project}/__init__.py (100%) rename {dab_project => src/dab_project}/__main__.py (100%) rename {dab_project => src/dab_project}/catalog.py (100%) rename {dab_project => src/dab_project}/cli.py (100%) rename {dab_project => src/dab_project}/databricks.py (100%) rename {dab_project => src/dab_project}/delta.py (100%) rename {dab_project => src/dab_project}/spark.py (100%) rename {dab_project => src/dab_project}/tasks/__init__.py (100%) rename {dab_project => src/dab_project}/tasks/base_task.py (100%) rename {dab_project => src/dab_project}/tasks/bronze_accuweather.py (100%) rename {dab_project => src/dab_project}/tasks/bronze_nyctaxi.py (100%) rename {dab_project => src/dab_project}/tasks/silver_nyctaxi_aggregated.py (100%) diff --git a/pyproject.toml b/pyproject.toml index c8221b4..47a7b32 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,10 +38,6 @@ dev = [ [project.scripts] dab-project = "dab_project.cli:main" -[tool.uv.build-backend] -module-name = "dab_project" -module-root = "" - [[tool.mypy.overrides]] ignore_missing_imports = true @@ -49,17 +45,23 @@ ignore_missing_imports = true line-length = 100 include = [ "pyproject.toml", - "dab_project/**/*.py", + "src/**/*.py", "resources/**/*.py", "scripts/**/*.py", "tests/**/*.py", ] +exclude = [ + "tests/notebook_run_pytest.py" +] [tool.ruff.format] docstring-code-format = true [tool.ruff.lint] -exclude = ["scratch/**/*.py"] +exclude = [ + "scratch/**/*.py", + "tests/notebook_run_pytest.py" +] select = [ "E", # pycodestyle "W", # pycodestyle @@ -82,6 +84,7 @@ line-length = 100 target-version = ['py312'] [tool.pytest.ini_options] +pythonpath = "src" addopts = "-ra -q" testpaths = [ "tests", diff --git a/dab_project/__init__.py b/src/dab_project/__init__.py similarity index 100% rename from dab_project/__init__.py rename to src/dab_project/__init__.py diff --git a/dab_project/__main__.py b/src/dab_project/__main__.py similarity index 100% rename from dab_project/__main__.py rename to src/dab_project/__main__.py diff --git a/dab_project/catalog.py b/src/dab_project/catalog.py similarity index 100% rename from dab_project/catalog.py rename to src/dab_project/catalog.py diff --git a/dab_project/cli.py b/src/dab_project/cli.py similarity index 100% rename from dab_project/cli.py rename to src/dab_project/cli.py diff --git a/dab_project/databricks.py b/src/dab_project/databricks.py similarity index 100% rename from dab_project/databricks.py rename to src/dab_project/databricks.py diff --git a/dab_project/delta.py b/src/dab_project/delta.py similarity index 100% rename from dab_project/delta.py rename to src/dab_project/delta.py diff --git a/dab_project/spark.py b/src/dab_project/spark.py similarity index 100% rename from dab_project/spark.py rename to src/dab_project/spark.py diff --git a/dab_project/tasks/__init__.py b/src/dab_project/tasks/__init__.py similarity index 100% rename from dab_project/tasks/__init__.py rename to src/dab_project/tasks/__init__.py diff --git a/dab_project/tasks/base_task.py b/src/dab_project/tasks/base_task.py similarity index 100% rename from dab_project/tasks/base_task.py rename to src/dab_project/tasks/base_task.py diff --git a/dab_project/tasks/bronze_accuweather.py b/src/dab_project/tasks/bronze_accuweather.py similarity index 100% rename from dab_project/tasks/bronze_accuweather.py rename to src/dab_project/tasks/bronze_accuweather.py diff --git a/dab_project/tasks/bronze_nyctaxi.py b/src/dab_project/tasks/bronze_nyctaxi.py similarity index 100% rename from dab_project/tasks/bronze_nyctaxi.py rename to src/dab_project/tasks/bronze_nyctaxi.py diff --git a/dab_project/tasks/silver_nyctaxi_aggregated.py b/src/dab_project/tasks/silver_nyctaxi_aggregated.py similarity index 100% rename from dab_project/tasks/silver_nyctaxi_aggregated.py rename to src/dab_project/tasks/silver_nyctaxi_aggregated.py From e479d9e42ee31b7676bd206d150ec9359a45e493 Mon Sep 17 00:00:00 2001 From: frizzleqq Date: Sun, 10 Aug 2025 14:57:00 +0200 Subject: [PATCH 4/4] fix databricks sync --- databricks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/databricks.yml b/databricks.yml index 3f58519..da5fba5 100644 --- a/databricks.yml +++ b/databricks.yml @@ -18,9 +18,9 @@ include: sync: paths: - - dab_project/ - dbt/ - resources/ + - src/ - typings/ - databricks.yml - pyproject.toml