From 33ae460a988f74dbbf6a18bc47ecdded4aa272fa Mon Sep 17 00:00:00 2001 From: aborruso Date: Wed, 10 Jun 2026 22:16:01 +0200 Subject: [PATCH 1/2] fix: init SQLite schema per DB path, not via global flag (#42) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The module-level _DB_INITIALIZED bool was set True after the first provider's schema creation. Switching provider opens a new DB file in a different cache dir, but the flag stayed True so the schema was never created — raising "no such table: invalid_datasets". Track initialized DB paths in a set (_INITIALIZED_DBS) so each new DB file gets its own schema. Add a cross-provider regression test. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/opensdmx/db_cache.py | 10 +++++----- tests/test_db_cache.py | 24 +++++++++++++++++++++++- tests/test_http.py | 4 ++-- 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/src/opensdmx/db_cache.py b/src/opensdmx/db_cache.py index d443800..d47d759 100644 --- a/src/opensdmx/db_cache.py +++ b/src/opensdmx/db_cache.py @@ -13,18 +13,18 @@ def _get_db_path() -> Path: return get_cache_dir() / "cache.db" -_DB_INITIALIZED = False +_INITIALIZED_DBS: set[str] = set() @contextmanager def _db_conn(): """Yield a ready-to-use connection, then commit and close it.""" - global _DB_INITIALIZED - conn = sqlite3.connect(_get_db_path(), timeout=10) + db_path = _get_db_path() + conn = sqlite3.connect(db_path, timeout=10) conn.row_factory = sqlite3.Row conn.execute("PRAGMA journal_mode=DELETE") try: - if not _DB_INITIALIZED: + if str(db_path) not in _INITIALIZED_DBS: conn.executescript(""" CREATE TABLE IF NOT EXISTS structure_dims ( structure_id TEXT NOT NULL, @@ -69,7 +69,7 @@ def _db_conn(): PRIMARY KEY (agency_id, df_id) ); """) - _DB_INITIALIZED = True + _INITIALIZED_DBS.add(str(db_path)) yield conn conn.commit() except Exception: diff --git a/tests/test_db_cache.py b/tests/test_db_cache.py index 16d17f9..776fcfe 100644 --- a/tests/test_db_cache.py +++ b/tests/test_db_cache.py @@ -14,7 +14,29 @@ def _isolated_db(tmp_path, monkeypatch): """Redirect db_cache to a temporary SQLite database for every test.""" db_file = tmp_path / "test_cache.db" monkeypatch.setattr(db_cache, "_get_db_path", lambda: db_file) - monkeypatch.setattr(db_cache, "_DB_INITIALIZED", False) + monkeypatch.setattr(db_cache, "_INITIALIZED_DBS", set()) + + +# ── cross-provider schema init (issue #42) ─────────────────────────── + +def test_schema_init_per_db_path(tmp_path, monkeypatch): + """Switching DB path (new provider) must re-create the schema. + + Regression for issue #42: a global init flag skipped schema creation on + the second provider, raising 'no such table: invalid_datasets'. + """ + monkeypatch.setattr(db_cache, "_INITIALIZED_DBS", set()) + db_a = tmp_path / "a" / "cache.db" + db_b = tmp_path / "b" / "cache.db" + db_a.parent.mkdir() + db_b.parent.mkdir() + + monkeypatch.setattr(db_cache, "_get_db_path", lambda: db_a) + assert db_cache.get_invalid_dataset_ids() == set() + + # Second provider — a brand new DB file must still have the schema. + monkeypatch.setattr(db_cache, "_get_db_path", lambda: db_b) + assert db_cache.get_invalid_dataset_ids() == set() # ── structure dims ─────────────────────────────────────────────────── diff --git a/tests/test_http.py b/tests/test_http.py index 71fc851..6b0ee01 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -567,7 +567,7 @@ def _fast_retry(self, monkeypatch): from tenacity import wait_none monkeypatch.setattr("opensdmx.base.wait_exponential", lambda **kwargs: wait_none()) # Force db_cache to re-init schema for the per-test tmp cache dir. - monkeypatch.setattr("opensdmx.db_cache._DB_INITIALIZED", False) + monkeypatch.setattr("opensdmx.db_cache._INITIALIZED_DBS", set()) # Skip per-provider rate-limit sleep so multi-attempt cases stay fast. monkeypatch.setattr("opensdmx.base._rate_limit_check", lambda is_data=False: None) @@ -674,7 +674,7 @@ class TestConstraintEndpointPathBuild: def _setup(self, monkeypatch): from tenacity import wait_none monkeypatch.setattr("opensdmx.base.wait_exponential", lambda **kwargs: wait_none()) - monkeypatch.setattr("opensdmx.db_cache._DB_INITIALIZED", False) + monkeypatch.setattr("opensdmx.db_cache._INITIALIZED_DBS", set()) monkeypatch.setattr("opensdmx.base._rate_limit_check", lambda is_data=False: None) @staticmethod From 85ddc25a48aed56f2d55f0a1d9cce50a5d8362d4 Mon Sep 17 00:00:00 2001 From: aborruso Date: Wed, 10 Jun 2026 22:20:43 +0200 Subject: [PATCH 2/2] fix: harden DB schema guard (resolve path, reinit on missing file) Address Copilot review on #43: - key _INITIALIZED_DBS on the resolved absolute path so a relative OPENSDMX_CACHE_DIR + CWD change can't alias two different DB files - re-create the schema when the DB file did not exist before connecting, covering cache.db deleted during a long-lived process Co-Authored-By: Claude Opus 4.8 (1M context) --- src/opensdmx/db_cache.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/opensdmx/db_cache.py b/src/opensdmx/db_cache.py index d47d759..bc0af92 100644 --- a/src/opensdmx/db_cache.py +++ b/src/opensdmx/db_cache.py @@ -20,11 +20,17 @@ def _get_db_path() -> Path: def _db_conn(): """Yield a ready-to-use connection, then commit and close it.""" db_path = _get_db_path() + db_key = str(db_path.resolve()) + db_existed = db_path.exists() conn = sqlite3.connect(db_path, timeout=10) conn.row_factory = sqlite3.Row conn.execute("PRAGMA journal_mode=DELETE") try: - if str(db_path) not in _INITIALIZED_DBS: + # Re-create the schema when this DB file is new to us OR the file did + # not exist before connecting (e.g. a fresh provider cache, or the user + # deleted cache.db during a long-lived process). Keyed on the resolved + # absolute path so a relative path + CWD change can't alias two files. + if not db_existed or db_key not in _INITIALIZED_DBS: conn.executescript(""" CREATE TABLE IF NOT EXISTS structure_dims ( structure_id TEXT NOT NULL, @@ -69,7 +75,7 @@ def _db_conn(): PRIMARY KEY (agency_id, df_id) ); """) - _INITIALIZED_DBS.add(str(db_path)) + _INITIALIZED_DBS.add(db_key) yield conn conn.commit() except Exception: