diff --git a/src/opensdmx/db_cache.py b/src/opensdmx/db_cache.py index d443800..bc0af92 100644 --- a/src/opensdmx/db_cache.py +++ b/src/opensdmx/db_cache.py @@ -13,18 +13,24 @@ def _get_db_path() -> Path: return get_cache_dir() / "cache.db" -_DB_INITIALIZED = False +_INITIALIZED_DBS: set[str] = set() @contextmanager def _db_conn(): """Yield a ready-to-use connection, then commit and close it.""" - global _DB_INITIALIZED - conn = sqlite3.connect(_get_db_path(), timeout=10) + db_path = _get_db_path() + db_key = str(db_path.resolve()) + db_existed = db_path.exists() + conn = sqlite3.connect(db_path, timeout=10) conn.row_factory = sqlite3.Row conn.execute("PRAGMA journal_mode=DELETE") try: - if not _DB_INITIALIZED: + # Re-create the schema when this DB file is new to us OR the file did + # not exist before connecting (e.g. a fresh provider cache, or the user + # deleted cache.db during a long-lived process). Keyed on the resolved + # absolute path so a relative path + CWD change can't alias two files. + if not db_existed or db_key not in _INITIALIZED_DBS: conn.executescript(""" CREATE TABLE IF NOT EXISTS structure_dims ( structure_id TEXT NOT NULL, @@ -69,7 +75,7 @@ def _db_conn(): PRIMARY KEY (agency_id, df_id) ); """) - _DB_INITIALIZED = True + _INITIALIZED_DBS.add(db_key) yield conn conn.commit() except Exception: diff --git a/tests/test_db_cache.py b/tests/test_db_cache.py index 16d17f9..776fcfe 100644 --- a/tests/test_db_cache.py +++ b/tests/test_db_cache.py @@ -14,7 +14,29 @@ def _isolated_db(tmp_path, monkeypatch): """Redirect db_cache to a temporary SQLite database for every test.""" db_file = tmp_path / "test_cache.db" monkeypatch.setattr(db_cache, "_get_db_path", lambda: db_file) - monkeypatch.setattr(db_cache, "_DB_INITIALIZED", False) + monkeypatch.setattr(db_cache, "_INITIALIZED_DBS", set()) + + +# ── cross-provider schema init (issue #42) ─────────────────────────── + +def test_schema_init_per_db_path(tmp_path, monkeypatch): + """Switching DB path (new provider) must re-create the schema. + + Regression for issue #42: a global init flag skipped schema creation on + the second provider, raising 'no such table: invalid_datasets'. + """ + monkeypatch.setattr(db_cache, "_INITIALIZED_DBS", set()) + db_a = tmp_path / "a" / "cache.db" + db_b = tmp_path / "b" / "cache.db" + db_a.parent.mkdir() + db_b.parent.mkdir() + + monkeypatch.setattr(db_cache, "_get_db_path", lambda: db_a) + assert db_cache.get_invalid_dataset_ids() == set() + + # Second provider — a brand new DB file must still have the schema. + monkeypatch.setattr(db_cache, "_get_db_path", lambda: db_b) + assert db_cache.get_invalid_dataset_ids() == set() # ── structure dims ─────────────────────────────────────────────────── diff --git a/tests/test_http.py b/tests/test_http.py index 71fc851..6b0ee01 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -567,7 +567,7 @@ def _fast_retry(self, monkeypatch): from tenacity import wait_none monkeypatch.setattr("opensdmx.base.wait_exponential", lambda **kwargs: wait_none()) # Force db_cache to re-init schema for the per-test tmp cache dir. - monkeypatch.setattr("opensdmx.db_cache._DB_INITIALIZED", False) + monkeypatch.setattr("opensdmx.db_cache._INITIALIZED_DBS", set()) # Skip per-provider rate-limit sleep so multi-attempt cases stay fast. monkeypatch.setattr("opensdmx.base._rate_limit_check", lambda is_data=False: None) @@ -674,7 +674,7 @@ class TestConstraintEndpointPathBuild: def _setup(self, monkeypatch): from tenacity import wait_none monkeypatch.setattr("opensdmx.base.wait_exponential", lambda **kwargs: wait_none()) - monkeypatch.setattr("opensdmx.db_cache._DB_INITIALIZED", False) + monkeypatch.setattr("opensdmx.db_cache._INITIALIZED_DBS", set()) monkeypatch.setattr("opensdmx.base._rate_limit_check", lambda is_data=False: None) @staticmethod