Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@ build/
# Jupyter Notebooks
.ipynb_checkpoints/

# VS Code
.vscode/

# PyCharm
.idea/

Expand Down
6 changes: 6 additions & 0 deletions .vscode/extensions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"recommendations": [
"ms-python.python",
"charliermarsh.ruff",
]
}
14 changes: 14 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"python.analysis.typeCheckingMode": "standard",
"flake8.enabled": false,
"ruff.enable": true,
"ruff.lint.enable": true,
"[python]": {
"editor.formatOnSave": true,
"editor.defaultFormatter": "charliermarsh.ruff",
"editor.codeActionsOnSave": {
"source.fixAll": "explicit",
"source.organizeImports": "explicit"
}
},
}
38 changes: 38 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ tpcds_datagen = ["duckdb==1.3.1", "pyarrow>=15.0.0"]
tpch_datagen = ["duckdb==1.3.1", "pyarrow>=15.0.0"]
sparkmeasure = ["sparkmeasure==0.24.0"]
sail = ["pysail==0.3.3", "pyspark[connect]==4.0.0", "deltalake>=1.0.2", "pyarrow>=15.0.0"]
dev = ["ruff>=0.12.9"]

[project.urls]
github = "https://github.com/mwc360/LakeBench"
Expand All @@ -47,3 +48,40 @@ include-package-data = true

[tool.setuptools.packages.find]
where = ["src"]

[tool.ruff]
line-length = 120
indent-width = 4
extend-include = ["*.ipynb"]
extend-exclude = ["build", "dist", ".venv", ".github", ".vscode"]

[tool.ruff.format]
quote-style = "double"
indent-style = "space"
skip-magic-trailing-comma = false

[tool.ruff.lint]
select = ["ALL"]
ignore = [ # https://docs.astral.sh/ruff/rules/
"COM812", # Trailing comma missing
"D100", # Missing docstring in public module
"D200", # One-line docstring should fit on one line
"D203", # 1 blank line required before class docstring
"D212", # Multi-line docstring summary should start at the first line
"EM101", # Exception must not use a string literal, assign to variable first
"EM102", # Exception must not use an f-string literal, assign to variable first
"FBT001", # Boolean-typed positional argument in function definition
"FBT002", # Boolean default positional argument in function definition
"Q003", # Change outer quotes to avoid escaping inner quotes
"SLF001", # Private member accessed: {access}
"TRY003", # Avoid specifying long messages outside the exception class
]
unfixable = [
"C405", # Unnecessary list literal (rewrite as a set literal)
"D400", # First line should end with a period
"D415", # First line should end with a period, question mark, or exclamation point
"PT027", # Use `pytest.raises` instead of unittest-style `assertRaises`
"RET504", # Unnecessary assignment to `query` before `return` statement
"SIM118", # Use `key in dict` instead of `key in dict.keys()` - VERY DANGEROUS!
"T201", # `print` found
]
16 changes: 14 additions & 2 deletions src/lakebench/engines/duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,19 @@ def __init__(
cost_per_vcore_hour: Optional[float] = None,
):
"""
Initialize the DuckDB Engine Configs
Initialize the DuckDB Engine Configs.

Parameters
----------
delta_abfss_schema_path : str
TODO: what does it do?
Benchmark input data are converted from parque files to delta tables.
This path is the destination where those delta tables are saved to during benchmark execution.
cost_per_vcore_hour : Optional[float]
TODO: what does it mean; what is it good for?
In order to compare costs of benchmark execution of different engines, this parameter allows specifying the cost per vCore-hour.
Benchmark result table contains the cost per query.

"""
super().__init__()
import duckdb
Expand All @@ -32,7 +44,7 @@ def __init__(
self.catalog_name = None
self.schema_name = None
if self.delta_abfss_schema_path.startswith("abfss://"):
if self.is_fabric:
if self.is_fabric: # this could be moved to BaseEngine
os.environ["AZURE_STORAGE_TOKEN"] = (
self.notebookutils.credentials.getToken("storage")
)
Expand Down