From 0d42b74f090318b7218d4d7f3abb334b1f66b62d Mon Sep 17 00:00:00 2001 From: michaelj Date: Fri, 6 Mar 2026 14:42:57 +0000 Subject: [PATCH] feat(sql): cache SQLAlchemy engine per database URL to eliminate redundant TCP handshake Without caching, every component that calls engine_for_sql_store for the same database URL (the metastore SQLResourceStore and the samplestore SQLSampleStore being the primary pair) each calls sqlalchemy.create_engine, which creates a separate connection pool and therefore opens a separate TCP+TLS+MySQL auth connection. Over a high-latency link the full handshake can const 1-3. With two independent engines, both the metastore and samplestore each paid this cost, adding an addition 1-3s to every CLI command. _engine_cache (a process-level dict keyed by connection URL) ensures that the second call for the same URL returns the already-initialised Engine and reuses its warm connection pool, reducing the connection cost from 2x to 1x handshake per process lifetime. Local measured saving on `ado show requests/results/entities`: ~2,360ms. --- orchestrator/metastore/sql/utils.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/orchestrator/metastore/sql/utils.py b/orchestrator/metastore/sql/utils.py index d93e3fb4..afd051d6 100644 --- a/orchestrator/metastore/sql/utils.py +++ b/orchestrator/metastore/sql/utils.py @@ -5,10 +5,29 @@ from orchestrator.utilities.location import SQLStoreConfiguration +# Process-level cache: reuse the same SQLAlchemy Engine (and its connection pool) +# for every call with the same database URL. This means the metastore and the +# samplestore — which both point at the same MySQL server — share one pool and +# avoid the overhead of opening a second TCP connection. +_engine_cache: dict[str, sqlalchemy.Engine] = {} + def engine_for_sql_store( configuration: SQLStoreConfiguration, database: str | None = None ) -> sqlalchemy.Engine: + """Return a SQLAlchemy Engine for the given store configuration. + + Engines are cached by their connection URL so that multiple components + connecting to the same database reuse a single connection pool rather than + each opening their own TCP connection. + + Args: + configuration: Database connection parameters. + database: Optional database name override. + + Returns: + A (possibly cached) SQLAlchemy Engine. + """ if configuration is None: raise ValueError("engine_for_sql_store requires a valid SQLStoreConfiguration") @@ -27,6 +46,10 @@ def engine_for_sql_store( if configuration.scheme == "sqlite" else configuration.url().unicode_string() ) + + if db_location in _engine_cache: + return _engine_cache[db_location] + engine_args: dict = {"echo": False} if configuration.scheme != "sqlite": # Prevent "Lost connection to MySQL server during query" (error 2013) when @@ -39,7 +62,10 @@ def engine_for_sql_store( # Other components on the connection also may close the connection at # other unknown intervals # engine_args["pool_recycle"] = 1800 - return sqlalchemy.create_engine(db_location, **engine_args) + + engine = sqlalchemy.create_engine(db_location, **engine_args) + _engine_cache[db_location] = engine + return engine def create_sql_resource_store(engine: sqlalchemy.Engine) -> sqlalchemy.Engine: