Skip to content

Commit fcfbe8f

Browse files
Feat: Add support for configurable cache directory (#4869)
1 parent 25da4df commit fcfbe8f

File tree

17 files changed

+190
-27
lines changed

17 files changed

+190
-27
lines changed

docs/guides/configuration.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,34 @@ Conceptually, we can group the root level parameters into the following types. E
288288

289289
The rest of this page provides additional detail for some of the configuration options and provides brief examples. Comprehensive lists of configuration options are at the [configuration reference page](../reference/configuration.md).
290290

291+
### Cache directory
292+
293+
By default, the SQLMesh cache is stored in a `.cache` directory within your project folder. You can customize the cache location using the `cache_dir` configuration option:
294+
295+
=== "YAML"
296+
297+
```yaml linenums="1"
298+
# Relative path to project directory
299+
cache_dir: my_custom_cache
300+
301+
# Absolute path
302+
cache_dir: /tmp/sqlmesh_cache
303+
304+
```
305+
306+
=== "Python"
307+
308+
```python linenums="1"
309+
from sqlmesh.core.config import Config, ModelDefaultsConfig
310+
311+
config = Config(
312+
model_defaults=ModelDefaultsConfig(dialect="duckdb"),
313+
cache_dir="/tmp/sqlmesh_cache",
314+
)
315+
```
316+
317+
The cache directory is automatically created if it doesn't exist. You can clear the cache using the `sqlmesh clean` command.
318+
291319
### Table/view storage locations
292320

293321
SQLMesh creates schemas, physical tables, and views in the data warehouse/engine. Learn more about why and how SQLMesh creates schema in the ["Why does SQLMesh create schemas?" FAQ](../faq/faq.md#schema-question).

docs/reference/configuration.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ Configuration options for SQLMesh project directories.
2020
| ------------------ | ------------------------------------------------------------------------------------------------------------------ | :----------: | :------: |
2121
| `ignore_patterns` | Files that match glob patterns specified in this list are ignored when scanning the project folder (Default: `[]`) | list[string] | N |
2222
| `project` | The project name of this config. Used for [multi-repo setups](../guides/multi_repo.md). | string | N |
23+
| `cache_dir` | The directory to store the SQLMesh cache. Can be an absolute path or relative to the project directory. (Default: `.cache`) | string | N |
2324

2425
### Environments
2526

sqlmesh/core/config/root.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ class Config(BaseConfig):
120120
disable_anonymized_analytics: Whether to disable the anonymized analytics collection.
121121
before_all: SQL statements or macros to be executed at the start of the `sqlmesh plan` and `sqlmesh run` commands.
122122
after_all: SQL statements or macros to be executed at the end of the `sqlmesh plan` and `sqlmesh run` commands.
123+
cache_dir: The directory to store the SQLMesh cache. Defaults to .cache in the project folder.
123124
"""
124125

125126
gateways: GatewayDict = {"": GatewayConfig()}
@@ -165,6 +166,7 @@ class Config(BaseConfig):
165166
after_all: t.Optional[t.List[str]] = None
166167
linter: LinterConfig = LinterConfig()
167168
janitor: JanitorConfig = JanitorConfig()
169+
cache_dir: t.Optional[str] = None
168170

169171
_FIELD_UPDATE_STRATEGY: t.ClassVar[t.Dict[str, UpdateStrategy]] = {
170172
"gateways": UpdateStrategy.NESTED_UPDATE,

sqlmesh/core/config/scheduler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def create_state_sync(self, context: GenericContext) -> StateSync:
105105

106106
schema = context.config.get_state_schema(context.gateway)
107107
return EngineAdapterStateSync(
108-
engine_adapter, schema=schema, context_path=context.path, console=context.console
108+
engine_adapter, schema=schema, cache_dir=context.cache_dir, console=context.console
109109
)
110110

111111
def state_sync_fingerprint(self, context: GenericContext) -> str:

sqlmesh/core/context.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -504,7 +504,11 @@ def upsert_model(self, model: t.Union[str, Model], **kwargs: t.Any) -> Model:
504504
}
505505
)
506506

507-
update_model_schemas(self.dag, models=self._models, context_path=self.path)
507+
update_model_schemas(
508+
self.dag,
509+
models=self._models,
510+
cache_dir=self.cache_dir,
511+
)
508512

509513
if model.dialect:
510514
self._all_dialects.add(model.dialect)
@@ -640,7 +644,11 @@ def load(self, update_schemas: bool = True) -> GenericContext[C]:
640644
self._models.update({fqn: model.copy(update={"mapping_schema": {}})})
641645
continue
642646

643-
update_model_schemas(self.dag, models=self._models, context_path=self.path)
647+
update_model_schemas(
648+
self.dag,
649+
models=self._models,
650+
cache_dir=self.cache_dir,
651+
)
644652

645653
models = self.models.values()
646654
for model in models:
@@ -2439,6 +2447,9 @@ def clear_caches(self) -> None:
24392447
cache_path = path / c.CACHE
24402448
if cache_path.exists():
24412449
rmtree(cache_path)
2450+
if self.cache_dir.exists():
2451+
rmtree(self.cache_dir)
2452+
24422453
if isinstance(self.state_sync, CachingStateSync):
24432454
self.state_sync.clear_cache()
24442455

@@ -2538,6 +2549,17 @@ def _model_tables(self) -> t.Dict[str, str]:
25382549
for fqn, snapshot in self.snapshots.items()
25392550
}
25402551

2552+
@cached_property
2553+
def cache_dir(self) -> Path:
2554+
if self.config.cache_dir:
2555+
cache_path = Path(self.config.cache_dir)
2556+
if cache_path.is_absolute():
2557+
return cache_path
2558+
return self.path / cache_path
2559+
2560+
# Default to .cache directory in the project path
2561+
return self.path / c.CACHE
2562+
25412563
@cached_property
25422564
def engine_adapters(self) -> t.Dict[str, EngineAdapter]:
25432565
"""Returns all the engine adapters for the gateways defined in the configuration."""
@@ -2735,6 +2757,7 @@ def _new_selector(
27352757
dag=dag,
27362758
default_catalog=self.default_catalog,
27372759
dialect=self.default_dialect,
2760+
cache_dir=self.cache_dir,
27382761
)
27392762

27402763
def _register_notification_targets(self) -> None:

sqlmesh/core/loader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -887,7 +887,7 @@ class _Cache(CacheBase):
887887
def __init__(self, loader: SqlMeshLoader, config_path: Path):
888888
self._loader = loader
889889
self.config_path = config_path
890-
self._model_cache = ModelCache(self.config_path / c.CACHE)
890+
self._model_cache = ModelCache(self._loader.context.cache_dir)
891891

892892
def get_or_load_models(
893893
self, target_path: Path, loader: t.Callable[[], t.List[Model]]

sqlmesh/core/model/schema.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from sqlglot.errors import SchemaError
88
from sqlglot.schema import MappingSchema
99

10-
from sqlmesh.core import constants as c
1110
from sqlmesh.core.model.cache import (
1211
load_optimized_query_and_mapping,
1312
optimized_query_cache_pool,
@@ -23,10 +22,10 @@
2322
def update_model_schemas(
2423
dag: DAG[str],
2524
models: UniqueKeyDict[str, Model],
26-
context_path: Path,
25+
cache_dir: Path,
2726
) -> None:
2827
schema = MappingSchema(normalize=False)
29-
optimized_query_cache: OptimizedQueryCache = OptimizedQueryCache(context_path / c.CACHE)
28+
optimized_query_cache: OptimizedQueryCache = OptimizedQueryCache(cache_dir)
3029

3130
_update_model_schemas(dag, models, schema, optimized_query_cache)
3231

sqlmesh/core/selector.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from sqlglot.dialects.dialect import Dialect, DialectType
1111
from sqlglot.helper import seq_get
1212

13+
from sqlmesh.core import constants as c
1314
from sqlmesh.core.dialect import normalize_model_name
1415
from sqlmesh.core.environment import Environment
1516
from sqlmesh.core.model import update_model_schemas
@@ -34,10 +35,12 @@ def __init__(
3435
dag: t.Optional[DAG[str]] = None,
3536
default_catalog: t.Optional[str] = None,
3637
dialect: t.Optional[str] = None,
38+
cache_dir: t.Optional[Path] = None,
3739
):
3840
self._state_reader = state_reader
3941
self._models = models
4042
self._context_path = context_path
43+
self._cache_dir = cache_dir if cache_dir else context_path / c.CACHE
4144
self._default_catalog = default_catalog
4245
self._dialect = dialect
4346
self._git_client = GitClient(context_path)
@@ -157,7 +160,7 @@ def get_model(fqn: str) -> t.Optional[Model]:
157160
models[model.fqn] = model
158161

159162
if needs_update:
160-
update_model_schemas(dag, models=models, context_path=self._context_path)
163+
update_model_schemas(dag, models=models, cache_dir=self._cache_dir)
161164

162165
return models
163166

sqlmesh/core/state_sync/db/facade.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,22 +79,20 @@ class EngineAdapterStateSync(StateSync):
7979
engine_adapter: The EngineAdapter to use to store and fetch snapshots.
8080
schema: The schema to store state metadata in. If None or empty string then no schema is defined
8181
console: The console to log information to.
82-
context_path: The context path, used for caching snapshot models.
82+
cache_dir: The cache path, used for caching snapshot models.
8383
"""
8484

8585
def __init__(
8686
self,
8787
engine_adapter: EngineAdapter,
8888
schema: t.Optional[str],
8989
console: t.Optional[Console] = None,
90-
context_path: Path = Path(),
90+
cache_dir: Path = Path(),
9191
):
9292
self.plan_dags_table = exp.table_("_plan_dags", db=schema)
9393
self.interval_state = IntervalState(engine_adapter, schema=schema)
9494
self.environment_state = EnvironmentState(engine_adapter, schema=schema)
95-
self.snapshot_state = SnapshotState(
96-
engine_adapter, schema=schema, context_path=context_path
97-
)
95+
self.snapshot_state = SnapshotState(engine_adapter, schema=schema, cache_dir=cache_dir)
9896
self.version_state = VersionState(engine_adapter, schema=schema)
9997
self.migrator = StateMigrator(
10098
engine_adapter,

sqlmesh/core/state_sync/db/snapshot.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from sqlglot import exp
99
from pydantic import Field
1010

11-
from sqlmesh.core import constants as c
1211
from sqlmesh.core.engine_adapter import EngineAdapter
1312
from sqlmesh.core.state_sync.db.utils import (
1413
snapshot_name_version_filter,
@@ -53,7 +52,7 @@ def __init__(
5352
self,
5453
engine_adapter: EngineAdapter,
5554
schema: t.Optional[str] = None,
56-
context_path: Path = Path(),
55+
cache_dir: Path = Path(),
5756
):
5857
self.engine_adapter = engine_adapter
5958
self.snapshots_table = exp.table_("_snapshots", db=schema)
@@ -79,7 +78,7 @@ def __init__(
7978
"next_auto_restatement_ts": exp.DataType.build("bigint"),
8079
}
8180

82-
self._snapshot_cache = SnapshotCache(context_path / c.CACHE)
81+
self._snapshot_cache = SnapshotCache(cache_dir)
8382

8483
def push_snapshots(self, snapshots: t.Iterable[Snapshot], overwrite: bool = False) -> None:
8584
"""Pushes snapshots to the state store.

0 commit comments

Comments
 (0)