From 0d8e190199f411ef0edfd6796320ba87aafe426e Mon Sep 17 00:00:00 2001 From: Martin <29750255+keen85@users.noreply.github.com> Date: Tue, 19 Aug 2025 22:36:23 +0200 Subject: [PATCH 1/3] tooling: ruff --- .gitignore | 3 --- .vscode/extensions.json | 6 ++++++ .vscode/settings.json | 14 ++++++++++++++ pyproject.toml | 38 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 .vscode/extensions.json create mode 100644 .vscode/settings.json diff --git a/.gitignore b/.gitignore index 5f0f621..3e33010 100644 --- a/.gitignore +++ b/.gitignore @@ -24,9 +24,6 @@ build/ # Jupyter Notebooks .ipynb_checkpoints/ -# VS Code -.vscode/ - # PyCharm .idea/ diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..37faaaf --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,6 @@ +{ + "recommendations": [ + "ms-python.python", + "charliermarsh.ruff", + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..cbae6c5 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,14 @@ +{ + "python.analysis.typeCheckingMode": "standard", + "flake8.enabled": false, + "ruff.enable": true, + "ruff.lint.enable": true, + "[python]": { + "editor.formatOnSave": true, + "editor.defaultFormatter": "charliermarsh.ruff", + "editor.codeActionsOnSave": { + "source.fixAll": "explicit", + "source.organizeImports": "explicit" + } + }, +} diff --git a/pyproject.toml b/pyproject.toml index 5ce1c87..ea66f69 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ tpcds_datagen = ["duckdb==1.3.1", "pyarrow>=15.0.0"] tpch_datagen = ["duckdb==1.3.1", "pyarrow>=15.0.0"] sparkmeasure = ["sparkmeasure==0.24.0"] sail = ["pysail==0.3.3", "pyspark[connect]==4.0.0", "deltalake>=1.0.2", "pyarrow>=15.0.0"] +dev = ["ruff>=0.12.9"] [project.urls] github = "https://github.com/mwc360/LakeBench" @@ -47,3 +48,40 @@ include-package-data = true [tool.setuptools.packages.find] where = ["src"] + +[tool.ruff] +line-length = 120 +indent-width = 4 +extend-include = ["*.ipynb"] +extend-exclude = ["build", "dist", ".venv", ".github", ".vscode"] + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +skip-magic-trailing-comma = false + +[tool.ruff.lint] +select = ["ALL"] +ignore = [ # https://docs.astral.sh/ruff/rules/ + "COM812", # Trailing comma missing + "D100", # Missing docstring in public module + "D200", # One-line docstring should fit on one line + "D203", # 1 blank line required before class docstring + "D212", # Multi-line docstring summary should start at the first line + "EM101", # Exception must not use a string literal, assign to variable first + "EM102", # Exception must not use an f-string literal, assign to variable first + "FBT001", # Boolean-typed positional argument in function definition + "FBT002", # Boolean default positional argument in function definition + "Q003", # Change outer quotes to avoid escaping inner quotes + "SLF001", # Private member accessed: {access} + "TRY003", # Avoid specifying long messages outside the exception class +] +unfixable = [ + "C405", # Unnecessary list literal (rewrite as a set literal) + "D400", # First line should end with a period + "D415", # First line should end with a period, question mark, or exclamation point + "PT027", # Use `pytest.raises` instead of unittest-style `assertRaises` + "RET504", # Unnecessary assignment to `query` before `return` statement + "SIM118", # Use `key in dict` instead of `key in dict.keys()` - VERY DANGEROUS! + "T201", # `print` found +] \ No newline at end of file From a0e65c98f17566cd4361d39b16df5b84660ffbb7 Mon Sep 17 00:00:00 2001 From: Martin <29750255+keen85@users.noreply.github.com> Date: Tue, 19 Aug 2025 23:00:55 +0200 Subject: [PATCH 2/3] docstrings --- src/lakebench/engines/duckdb.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/lakebench/engines/duckdb.py b/src/lakebench/engines/duckdb.py index e902238..74c31ea 100644 --- a/src/lakebench/engines/duckdb.py +++ b/src/lakebench/engines/duckdb.py @@ -22,7 +22,19 @@ def __init__( cost_per_vcore_hour: Optional[float] = None, ): """ - Initialize the DuckDB Engine Configs + Initialize the DuckDB Engine Configs. + + Parameters + ---------- + delta_abfss_schema_path : str + TODO: what does it do? + Benchmark input data are converted from parque files to delta tables. + This path is the destination where those delta tables are saved to during benchmark execution. + cost_per_vcore_hour : Optional[float] + TODO: what does it mean; what is it good for? + In order to compare costs of benchmark execution of different engines, this parameter allows specifying the cost per vCore-hour. + Benchmark result table contains the cost per query. + """ super().__init__() import duckdb From 9832a3c86c16588dd7f48099f4b53b4ab75984c5 Mon Sep 17 00:00:00 2001 From: Martin <29750255+keen85@users.noreply.github.com> Date: Tue, 19 Aug 2025 23:05:56 +0200 Subject: [PATCH 3/3] set token in BaseEngine? --- src/lakebench/engines/duckdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lakebench/engines/duckdb.py b/src/lakebench/engines/duckdb.py index 74c31ea..558e8b7 100644 --- a/src/lakebench/engines/duckdb.py +++ b/src/lakebench/engines/duckdb.py @@ -44,7 +44,7 @@ def __init__( self.catalog_name = None self.schema_name = None if self.delta_abfss_schema_path.startswith("abfss://"): - if self.is_fabric: + if self.is_fabric: # this could be moved to BaseEngine os.environ["AZURE_STORAGE_TOKEN"] = ( self.notebookutils.credentials.getToken("storage") )