From e7404d753f16c1cdc3b513b641c4eebd50dac33f Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Tue, 14 Apr 2026 05:54:19 +0000 Subject: [PATCH] Provide static schemas for grouped CDM Data --- .example_dotenv | 10 +- docs/api/configuration.md | 124 ++++++++++++++++++ docs/getting-started/maintenance.md | 4 + omop_alchemy/cdm/__init__.py | 41 ++++++ omop_alchemy/cdm/base/__init__.py | 18 +++ omop_alchemy/cdm/base/decorators.py | 14 ++ omop_alchemy/cdm/base/schema_mixins.py | 48 +++++++ .../model/clinical/condition_occurrence.py | 2 + omop_alchemy/cdm/model/clinical/death.py | 3 +- .../cdm/model/clinical/device_exposure.py | 2 + .../cdm/model/clinical/drug_exposure.py | 2 + .../cdm/model/clinical/measurement.py | 3 +- .../cdm/model/clinical/observation.py | 3 +- omop_alchemy/cdm/model/clinical/person.py | 3 +- .../model/clinical/procedure_occurrence.py | 3 +- omop_alchemy/cdm/model/clinical/specimen.py | 3 +- omop_alchemy/cdm/model/derived/cohort.py | 3 +- .../cdm/model/derived/cohort_definition.py | 3 +- .../cdm/model/derived/condition_era.py | 3 +- omop_alchemy/cdm/model/derived/dose_era.py | 3 +- omop_alchemy/cdm/model/derived/drug_era.py | 3 +- .../cdm/model/derived/observation_period.py | 3 +- .../cdm/model/health_economic/cost.py | 3 +- .../health_economic/payer_plan_period.py | 3 +- .../cdm/model/health_system/care_site.py | 3 +- .../cdm/model/health_system/location.py | 3 +- .../cdm/model/health_system/provider.py | 3 +- .../cdm/model/health_system/visit_detail.py | 3 +- .../model/health_system/visit_occurrence.py | 3 +- omop_alchemy/cdm/model/metadata/cdm_source.py | 3 +- omop_alchemy/cdm/model/metadata/metadata.py | 3 +- omop_alchemy/cdm/model/structural/episode.py | 3 +- .../cdm/model/structural/episode_event.py | 3 +- .../cdm/model/structural/fact_relationship.py | 3 +- omop_alchemy/cdm/model/unstructured/note.py | 3 +- .../cdm/model/unstructured/note_nlp.py | 3 +- omop_alchemy/cdm/model/vocabulary/concept.py | 2 + .../cdm/model/vocabulary/concept_ancestor.py | 3 +- .../cdm/model/vocabulary/concept_class.py | 3 +- .../model/vocabulary/concept_relationship.py | 3 +- .../cdm/model/vocabulary/concept_synonym.py | 3 +- omop_alchemy/cdm/model/vocabulary/domain.py | 3 +- .../cdm/model/vocabulary/drug_strength.py | 2 + .../cdm/model/vocabulary/relationship.py | 3 +- .../model/vocabulary/source_to_concept_map.py | 2 + .../cdm/model/vocabulary/vocabulary.py | 3 +- omop_alchemy/maintenance/analyze_tables.py | 6 +- omop_alchemy/maintenance/data_summary.py | 12 +- omop_alchemy/maintenance/foreign_keys.py | 34 ++++- omop_alchemy/maintenance/indexes.py | 21 ++- omop_alchemy/maintenance/load_vocab.py | 6 +- omop_alchemy/maintenance/reconcile.py | 29 ++-- omop_alchemy/maintenance/reset_sequences.py | 13 +- omop_alchemy/maintenance/tables.py | 51 +++++-- omop_alchemy/maintenance/truncate_tables.py | 42 ++++-- tests/test_schema_mixins.py | 42 ++++++ 56 files changed, 548 insertions(+), 78 deletions(-) create mode 100644 omop_alchemy/cdm/base/schema_mixins.py create mode 100644 tests/test_schema_mixins.py diff --git a/.example_dotenv b/.example_dotenv index 780a7dd..2d51e33 100644 --- a/.example_dotenv +++ b/.example_dotenv @@ -1,2 +1,10 @@ ENGINE=sqlite://///your/database/url/goes/here/test.db -SOURCE_PATH=/where/do/you/keep/unzipped/athena_source/files/ \ No newline at end of file +SOURCE_PATH=/where/do/you/keep/unzipped/athena_source/files/ +OMOP_CLINICAL_SCHEMA=omop +OMOP_HEALTH_SYSTEM_SCHEMA=omop +OMOP_HEALTH_ECONOMIC_SCHEMA=omop +OMOP_STRUCTURAL_SCHEMA=omop +OMOP_UNSTRUCTURED_SCHEMA=omop +OMOP_METADATA_SCHEMA=omop +OMOP_VOCABULARY_SCHEMA=vocabulary +OMOP_DERIVED_SCHEMA=results \ No newline at end of file diff --git a/docs/api/configuration.md b/docs/api/configuration.md index e69de29..1bc8b22 100644 --- a/docs/api/configuration.md +++ b/docs/api/configuration.md @@ -0,0 +1,124 @@ +# Configuration + +OMOP Alchemy resolves table schemas at import time from a small set of environment +variables. This keeps the ORM static and easy to read while still allowing a +deployment to choose its schema layout once. + +## Schema variables + +![OMOP CDM v5.4 category map](https://ohdsi.github.io/CommonDataModel/images/cdm54.png) + +The schema categories in OMOP Alchemy are based on the OMOP CDM v5.4 groupings +shown above. In practice, that means each ORM table is assigned to one of these +category buckets, and each bucket resolves to one schema variable. + +OMOP Alchemy uses these category buckets as the source of truth: + +- `clinical` +- `health_system` +- `health_economic` +- `structural` +- `unstructured` +- `metadata` +- `vocabulary` +- `derived` + +Each table belongs to one of those categories, and therefore resolves its schema +from the corresponding `OMOP_*_SCHEMA` variable. + +The table categories are grouped by default as follows: + +- `OMOP_CLINICAL_SCHEMA=omop` +- `OMOP_HEALTH_SYSTEM_SCHEMA=omop` +- `OMOP_HEALTH_ECONOMIC_SCHEMA=omop` +- `OMOP_STRUCTURAL_SCHEMA=omop` +- `OMOP_UNSTRUCTURED_SCHEMA=omop` +- `OMOP_METADATA_SCHEMA=omop` +- `OMOP_VOCABULARY_SCHEMA=vocabulary` +- `OMOP_DERIVED_SCHEMA=results` + +Variable-to-tables mapping: + +- `OMOP_CLINICAL_SCHEMA` + - `person` + - `condition_occurrence` + - `death` + - `device_exposure` + - `drug_exposure` + - `measurement` + - `observation` + - `procedure_occurrence` + - `specimen` +- `OMOP_HEALTH_SYSTEM_SCHEMA` + - `care_site` + - `location` + - `provider` + - `visit_occurrence` + - `visit_detail` +- `OMOP_HEALTH_ECONOMIC_SCHEMA` + - `cost` + - `payer_plan_period` +- `OMOP_STRUCTURAL_SCHEMA` + - `episode` + - `episode_event` + - `fact_relationship` +- `OMOP_UNSTRUCTURED_SCHEMA` + - `note` + - `note_nlp` +- `OMOP_METADATA_SCHEMA` + - `cdm_source` + - `metadata` +- `OMOP_VOCABULARY_SCHEMA` + - `concept` + - `concept_ancestor` + - `concept_class` + - `concept_relationship` + - `concept_synonym` + - `domain` + - `drug_strength` + - `relationship` + - `source_to_concept_map` + - `vocabulary` +- `OMOP_DERIVED_SCHEMA` + - `cohort` + - `cohort_definition` + - `condition_era` + - `dose_era` + - `drug_era` + - `observation_period` + +Set any of these to a different schema name to override the default. Set the value +to `none` or `null` to leave that category schema-less. + +Operational implications when changing a category schema: + +- Queries against that category will target the new schema because ORM table + objects carry the resolved schema. +- Maintenance commands will inspect/create/manage those tables in the same schema + unless `--db-schema` is explicitly provided as an override. +- Existing tables are not migrated automatically. If you change a schema variable, + move data/DDL separately or create the target tables before running workloads. + +## Import order + +Load environment variables before importing the CDM model package. The ORM classes +read their schema mixins during class construction. + +For example: + +```python +from omop_alchemy import load_environment + +load_environment(".env") + +from omop_alchemy.cdm.model.vocabulary import Concept + +print(Concept.__table__.schema) +``` + +## Engine variables + +The database engine is still resolved from the existing engine variables: + +- `ENGINE_` when `engine_schema` is provided +- `ENGINE` as the fallback \ No newline at end of file diff --git a/docs/getting-started/maintenance.md b/docs/getting-started/maintenance.md index 98b4114..73879e2 100644 --- a/docs/getting-started/maintenance.md +++ b/docs/getting-started/maintenance.md @@ -59,6 +59,10 @@ Resolution order: `engine_schema` selects the configured engine URL (`ENGINE_` or `ENGINE`). `db_schema` selects the schema inside that database. +Schema-aware ORM tables are configured from environment variables before the model +package is imported. See [Configuration](../api/configuration.md) for the schema +env vars and defaults. + --- ## Backend support at a glance diff --git a/omop_alchemy/cdm/__init__.py b/omop_alchemy/cdm/__init__.py index e69de29..8ac33c5 100644 --- a/omop_alchemy/cdm/__init__.py +++ b/omop_alchemy/cdm/__init__.py @@ -0,0 +1,41 @@ +from .base import ( + CDMTableBase, + ClinicalSchemaMixin, + DerivedSchemaMixin, + HealthEconomicSchemaMixin, + HealthSystemSchemaMixin, + MetadataSchemaMixin, + StructuralSchemaMixin, + UnstructuredSchemaMixin, + VocabularySchemaMixin, + cdm_table, + merge_table_args, + omop_index, + omop_primary_key_index_name, + omop_table_options, + optional_concept_fk, + optional_int, + required_concept_fk, + required_int, +) + +__all__ = [ + "CDMTableBase", + "ClinicalSchemaMixin", + "DerivedSchemaMixin", + "HealthEconomicSchemaMixin", + "HealthSystemSchemaMixin", + "MetadataSchemaMixin", + "StructuralSchemaMixin", + "UnstructuredSchemaMixin", + "VocabularySchemaMixin", + "cdm_table", + "merge_table_args", + "omop_index", + "omop_primary_key_index_name", + "omop_table_options", + "optional_concept_fk", + "optional_int", + "required_concept_fk", + "required_int", +] diff --git a/omop_alchemy/cdm/base/__init__.py b/omop_alchemy/cdm/base/__init__.py index fb76662..8d5cc82 100644 --- a/omop_alchemy/cdm/base/__init__.py +++ b/omop_alchemy/cdm/base/__init__.py @@ -1,5 +1,15 @@ from .cdm_table_base import CDMTableBase from .decorators import cdm_table +from .schema_mixins import ( + ClinicalSchemaMixin, + DerivedSchemaMixin, + HealthEconomicSchemaMixin, + HealthSystemSchemaMixin, + MetadataSchemaMixin, + StructuralSchemaMixin, + UnstructuredSchemaMixin, + VocabularySchemaMixin, +) from .column_helpers import required_concept_fk, optional_concept_fk, optional_int, required_int from .column_mixins import ValueMixin, ReferenceTable, DatedEvent, PersonScoped, HealthSystemContext, FactTable from .indexing import merge_table_args, omop_index, omop_primary_key_index_name, omop_table_options @@ -13,6 +23,14 @@ "ExpectedDomain", "CDMTableBase", "cdm_table", + "ClinicalSchemaMixin", + "DerivedSchemaMixin", + "HealthEconomicSchemaMixin", + "HealthSystemSchemaMixin", + "MetadataSchemaMixin", + "StructuralSchemaMixin", + "UnstructuredSchemaMixin", + "VocabularySchemaMixin", "required_concept_fk", "optional_concept_fk", "optional_int", diff --git a/omop_alchemy/cdm/base/decorators.py b/omop_alchemy/cdm/base/decorators.py index 899b496..5e3d034 100644 --- a/omop_alchemy/cdm/base/decorators.py +++ b/omop_alchemy/cdm/base/decorators.py @@ -4,6 +4,7 @@ T = TypeVar("T", bound=type) MODEL_MODULE_PREFIX = "omop_alchemy.cdm.model." + def _infer_table_category(cls: type) -> str | None: model_module = cls.__module__ if not model_module.startswith(MODEL_MODULE_PREFIX): @@ -11,6 +12,14 @@ def _infer_table_category(cls: type) -> str | None: suffix = model_module.removeprefix(MODEL_MODULE_PREFIX) return suffix.split(".", 1)[0] + +def _infer_table_schema(cls: type) -> str | None: + for base in cls.__mro__[1:]: + schema_name = getattr(base, "__omop_schema__", None) + if schema_name is not None: + return schema_name + return None + def cdm_table(cls: T) -> T: """ Mark a SQLAlchemy declarative class as a concrete OMOP CDM table. @@ -38,4 +47,9 @@ def cdm_table(cls: T) -> T: cls.__omop_is_cdm_table__ = True cls.__omop_table_category__ = _infer_table_category(cls) + schema_name = _infer_table_schema(cls) + table = getattr(cls, "__table__", None) + if schema_name is not None and table is not None and table.schema is None: + table.schema = schema_name + return cls diff --git a/omop_alchemy/cdm/base/schema_mixins.py b/omop_alchemy/cdm/base/schema_mixins.py new file mode 100644 index 0000000..5723602 --- /dev/null +++ b/omop_alchemy/cdm/base/schema_mixins.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +import os + + +def _schema_from_env(env_var: str, default_schema: str | None) -> str | None: + raw_value = os.getenv(env_var) + if raw_value is None: + return default_schema + + normalized = raw_value.strip() + if not normalized: + return default_schema + if normalized.lower() in {"none", "null"}: + return None + return normalized + + +class ClinicalSchemaMixin: + __omop_schema__ = _schema_from_env("OMOP_CLINICAL_SCHEMA", "omop") + + +class DerivedSchemaMixin: + __omop_schema__ = _schema_from_env("OMOP_DERIVED_SCHEMA", "results") + + +class HealthEconomicSchemaMixin: + __omop_schema__ = _schema_from_env("OMOP_HEALTH_ECONOMIC_SCHEMA", "omop") + + +class HealthSystemSchemaMixin: + __omop_schema__ = _schema_from_env("OMOP_HEALTH_SYSTEM_SCHEMA", "omop") + + +class MetadataSchemaMixin: + __omop_schema__ = _schema_from_env("OMOP_METADATA_SCHEMA", "omop") + + +class StructuralSchemaMixin: + __omop_schema__ = _schema_from_env("OMOP_STRUCTURAL_SCHEMA", "omop") + + +class UnstructuredSchemaMixin: + __omop_schema__ = _schema_from_env("OMOP_UNSTRUCTURED_SCHEMA", "omop") + + +class VocabularySchemaMixin: + __omop_schema__ = _schema_from_env("OMOP_VOCABULARY_SCHEMA", "vocabulary") \ No newline at end of file diff --git a/omop_alchemy/cdm/model/clinical/condition_occurrence.py b/omop_alchemy/cdm/model/clinical/condition_occurrence.py index e9160ca..e04843e 100644 --- a/omop_alchemy/cdm/model/clinical/condition_occurrence.py +++ b/omop_alchemy/cdm/model/clinical/condition_occurrence.py @@ -11,6 +11,7 @@ ReferenceContext, CDMTableBase, cdm_table, + ClinicalSchemaMixin, ModifierFieldConcepts, ModifierTargetMixin, merge_table_args, @@ -23,6 +24,7 @@ @cdm_table class Condition_Occurrence( + ClinicalSchemaMixin, PersonScoped, HealthSystemContext, CDMTableBase, diff --git a/omop_alchemy/cdm/model/clinical/death.py b/omop_alchemy/cdm/model/clinical/death.py index 85daeed..31de062 100644 --- a/omop_alchemy/cdm/model/clinical/death.py +++ b/omop_alchemy/cdm/model/clinical/death.py @@ -7,6 +7,7 @@ from omop_alchemy.cdm.base import ( CDMTableBase, cdm_table, + ClinicalSchemaMixin, optional_concept_fk, ReferenceContext, DomainValidationMixin, @@ -20,7 +21,7 @@ from ..clinical import Person, PersonView @cdm_table -class Death(CDMTableBase, Base): +class Death(ClinicalSchemaMixin, CDMTableBase, Base): __tablename__ = "death" __table_args__ = merge_table_args( omop_table_options(cluster_on=omop_primary_key_index_name("death")), diff --git a/omop_alchemy/cdm/model/clinical/device_exposure.py b/omop_alchemy/cdm/model/clinical/device_exposure.py index c4862b2..ba874d8 100644 --- a/omop_alchemy/cdm/model/clinical/device_exposure.py +++ b/omop_alchemy/cdm/model/clinical/device_exposure.py @@ -10,6 +10,7 @@ FactTable, CDMTableBase, cdm_table, + ClinicalSchemaMixin, required_concept_fk, optional_concept_fk, optional_int, @@ -20,6 +21,7 @@ @cdm_table class Device_Exposure( + ClinicalSchemaMixin, PersonScoped, CDMTableBase, FactTable, diff --git a/omop_alchemy/cdm/model/clinical/drug_exposure.py b/omop_alchemy/cdm/model/clinical/drug_exposure.py index ea2d9aa..4cc3796 100644 --- a/omop_alchemy/cdm/model/clinical/drug_exposure.py +++ b/omop_alchemy/cdm/model/clinical/drug_exposure.py @@ -10,6 +10,7 @@ ReferenceContext, CDMTableBase, cdm_table, + ClinicalSchemaMixin, required_concept_fk, optional_concept_fk, optional_int, @@ -24,6 +25,7 @@ @cdm_table class Drug_Exposure( + ClinicalSchemaMixin, PersonScoped, CDMTableBase, FactTable, diff --git a/omop_alchemy/cdm/model/clinical/measurement.py b/omop_alchemy/cdm/model/clinical/measurement.py index ceeb771..9a35a7b 100644 --- a/omop_alchemy/cdm/model/clinical/measurement.py +++ b/omop_alchemy/cdm/model/clinical/measurement.py @@ -9,13 +9,14 @@ from omop_alchemy.cdm.base import ( CDMTableBase, cdm_table, + ClinicalSchemaMixin, ValueMixin, merge_table_args, omop_index, ) @cdm_table -class Measurement(Base, CDMTableBase, ValueMixin): +class Measurement(ClinicalSchemaMixin, Base, CDMTableBase, ValueMixin): __tablename__ = "measurement" __table_args__ = merge_table_args( ValueMixin.__table_args__, diff --git a/omop_alchemy/cdm/model/clinical/observation.py b/omop_alchemy/cdm/model/clinical/observation.py index 0484e22..050d3ee 100644 --- a/omop_alchemy/cdm/model/clinical/observation.py +++ b/omop_alchemy/cdm/model/clinical/observation.py @@ -9,13 +9,14 @@ from omop_alchemy.cdm.base import ( CDMTableBase, cdm_table, + ClinicalSchemaMixin, ValueMixin, merge_table_args, omop_index, ) @cdm_table -class Observation(Base, CDMTableBase, ValueMixin): +class Observation(ClinicalSchemaMixin, Base, CDMTableBase, ValueMixin): __tablename__ = "observation" __table_args__ = merge_table_args( ValueMixin.__table_args__, diff --git a/omop_alchemy/cdm/model/clinical/person.py b/omop_alchemy/cdm/model/clinical/person.py index df87335..3d63fbe 100644 --- a/omop_alchemy/cdm/model/clinical/person.py +++ b/omop_alchemy/cdm/model/clinical/person.py @@ -11,6 +11,7 @@ from omop_alchemy.cdm.base import ( cdm_table, CDMTableBase, + ClinicalSchemaMixin, required_concept_fk, optional_concept_fk, required_int, @@ -31,7 +32,7 @@ from ..derived import Observation_Period @cdm_table -class Person(CDMTableBase,Base,HealthSystemContext): +class Person(ClinicalSchemaMixin, CDMTableBase,Base,HealthSystemContext): __tablename__ = "person" __table_args__ = merge_table_args( omop_index(__tablename__, "gender_concept_id"), diff --git a/omop_alchemy/cdm/model/clinical/procedure_occurrence.py b/omop_alchemy/cdm/model/clinical/procedure_occurrence.py index e3d6c07..2b71fca 100644 --- a/omop_alchemy/cdm/model/clinical/procedure_occurrence.py +++ b/omop_alchemy/cdm/model/clinical/procedure_occurrence.py @@ -7,6 +7,7 @@ from omop_alchemy.cdm.base import ( cdm_table, CDMTableBase, + ClinicalSchemaMixin, optional_concept_fk, required_concept_fk, PersonScoped, @@ -24,7 +25,7 @@ from ..health_system import Provider, Visit_Occurrence, Visit_Detail @cdm_table -class Procedure_Occurrence(CDMTableBase, Base, PersonScoped, HealthSystemContext): +class Procedure_Occurrence(ClinicalSchemaMixin, CDMTableBase, Base, PersonScoped, HealthSystemContext): __tablename__ = "procedure_occurrence" __table_args__ = merge_table_args( omop_index(__tablename__, "person_id", cluster=True), diff --git a/omop_alchemy/cdm/model/clinical/specimen.py b/omop_alchemy/cdm/model/clinical/specimen.py index f33a00e..53dc257 100644 --- a/omop_alchemy/cdm/model/clinical/specimen.py +++ b/omop_alchemy/cdm/model/clinical/specimen.py @@ -6,6 +6,7 @@ from omop_alchemy.cdm.base import ( CDMTableBase, cdm_table, + ClinicalSchemaMixin, required_concept_fk, optional_concept_fk, merge_table_args, @@ -13,7 +14,7 @@ ) @cdm_table -class Specimen(CDMTableBase, Base): +class Specimen(ClinicalSchemaMixin, CDMTableBase, Base): __tablename__ = "specimen" __table_args__ = merge_table_args( omop_index(__tablename__, "person_id", cluster=True), diff --git a/omop_alchemy/cdm/model/derived/cohort.py b/omop_alchemy/cdm/model/derived/cohort.py index 987584c..182f64d 100644 --- a/omop_alchemy/cdm/model/derived/cohort.py +++ b/omop_alchemy/cdm/model/derived/cohort.py @@ -5,10 +5,11 @@ from omop_alchemy.cdm.base import ( cdm_table, CDMTableBase, + DerivedSchemaMixin, ) @cdm_table -class Cohort(CDMTableBase, Base): +class Cohort(DerivedSchemaMixin, CDMTableBase, Base): __tablename__ = "cohort" cohort_definition_id: so.Mapped[int] = so.mapped_column(primary_key=True) diff --git a/omop_alchemy/cdm/model/derived/cohort_definition.py b/omop_alchemy/cdm/model/derived/cohort_definition.py index 3cd8198..8742321 100644 --- a/omop_alchemy/cdm/model/derived/cohort_definition.py +++ b/omop_alchemy/cdm/model/derived/cohort_definition.py @@ -6,12 +6,13 @@ from omop_alchemy.cdm.base import ( cdm_table, CDMTableBase, + DerivedSchemaMixin, merge_table_args, omop_index, ) @cdm_table -class Cohort_Definition(CDMTableBase, Base): +class Cohort_Definition(DerivedSchemaMixin, CDMTableBase, Base): __tablename__ = "cohort_definition" __table_args__ = merge_table_args( omop_index(__tablename__, "definition_type_concept_id"), diff --git a/omop_alchemy/cdm/model/derived/condition_era.py b/omop_alchemy/cdm/model/derived/condition_era.py index 3891911..b05707f 100644 --- a/omop_alchemy/cdm/model/derived/condition_era.py +++ b/omop_alchemy/cdm/model/derived/condition_era.py @@ -6,13 +6,14 @@ from omop_alchemy.cdm.base import ( cdm_table, CDMTableBase, + DerivedSchemaMixin, required_concept_fk, merge_table_args, omop_index, ) @cdm_table -class Condition_Era(CDMTableBase, Base): +class Condition_Era(DerivedSchemaMixin, CDMTableBase, Base): __tablename__ = "condition_era" __table_args__ = merge_table_args( omop_index(__tablename__, "person_id", cluster=True), diff --git a/omop_alchemy/cdm/model/derived/dose_era.py b/omop_alchemy/cdm/model/derived/dose_era.py index 85f0975..222b559 100644 --- a/omop_alchemy/cdm/model/derived/dose_era.py +++ b/omop_alchemy/cdm/model/derived/dose_era.py @@ -5,13 +5,14 @@ from omop_alchemy.cdm.base import ( cdm_table, CDMTableBase, + DerivedSchemaMixin, required_concept_fk, merge_table_args, omop_index, ) @cdm_table -class Dose_Era(CDMTableBase, Base): +class Dose_Era(DerivedSchemaMixin, CDMTableBase, Base): __tablename__ = "dose_era" __table_args__ = merge_table_args( omop_index(__tablename__, "person_id", cluster=True), diff --git a/omop_alchemy/cdm/model/derived/drug_era.py b/omop_alchemy/cdm/model/derived/drug_era.py index 695c3ca..43a5067 100644 --- a/omop_alchemy/cdm/model/derived/drug_era.py +++ b/omop_alchemy/cdm/model/derived/drug_era.py @@ -6,13 +6,14 @@ from omop_alchemy.cdm.base import ( cdm_table, CDMTableBase, + DerivedSchemaMixin, required_concept_fk, merge_table_args, omop_index, ) @cdm_table -class Drug_Era(CDMTableBase, Base): +class Drug_Era(DerivedSchemaMixin, CDMTableBase, Base): __tablename__ = "drug_era" __table_args__ = merge_table_args( omop_index(__tablename__, "person_id", cluster=True), diff --git a/omop_alchemy/cdm/model/derived/observation_period.py b/omop_alchemy/cdm/model/derived/observation_period.py index b0d7bcc..26588e5 100644 --- a/omop_alchemy/cdm/model/derived/observation_period.py +++ b/omop_alchemy/cdm/model/derived/observation_period.py @@ -5,13 +5,14 @@ from omop_alchemy.cdm.base import ( cdm_table, CDMTableBase, + DerivedSchemaMixin, required_concept_fk, merge_table_args, omop_index, ) @cdm_table -class Observation_Period(CDMTableBase, Base): +class Observation_Period(DerivedSchemaMixin, CDMTableBase, Base): __tablename__ = "observation_period" __table_args__ = merge_table_args( omop_index(__tablename__, "person_id", cluster=True), diff --git a/omop_alchemy/cdm/model/health_economic/cost.py b/omop_alchemy/cdm/model/health_economic/cost.py index 2abecf6..69a12bb 100644 --- a/omop_alchemy/cdm/model/health_economic/cost.py +++ b/omop_alchemy/cdm/model/health_economic/cost.py @@ -5,6 +5,7 @@ from omop_alchemy.cdm.base import ( cdm_table, CDMTableBase, + HealthEconomicSchemaMixin, optional_concept_fk, required_concept_fk, merge_table_args, @@ -12,7 +13,7 @@ ) @cdm_table -class Cost(CDMTableBase, Base): +class Cost(HealthEconomicSchemaMixin, CDMTableBase, Base): __tablename__ = "cost" __table_args__ = merge_table_args( omop_index(__tablename__, "cost_event_id"), diff --git a/omop_alchemy/cdm/model/health_economic/payer_plan_period.py b/omop_alchemy/cdm/model/health_economic/payer_plan_period.py index 61bd5cb..8b60ad0 100644 --- a/omop_alchemy/cdm/model/health_economic/payer_plan_period.py +++ b/omop_alchemy/cdm/model/health_economic/payer_plan_period.py @@ -6,13 +6,14 @@ from omop_alchemy.cdm.base import ( cdm_table, CDMTableBase, + HealthEconomicSchemaMixin, optional_concept_fk, merge_table_args, omop_index, ) @cdm_table -class Payer_Plan_Period(CDMTableBase, Base): +class Payer_Plan_Period(HealthEconomicSchemaMixin, CDMTableBase, Base): __tablename__ = "payer_plan_period" __table_args__ = merge_table_args( omop_index(__tablename__, "person_id", cluster=True), diff --git a/omop_alchemy/cdm/model/health_system/care_site.py b/omop_alchemy/cdm/model/health_system/care_site.py index ba5b868..a5f8ff4 100644 --- a/omop_alchemy/cdm/model/health_system/care_site.py +++ b/omop_alchemy/cdm/model/health_system/care_site.py @@ -6,6 +6,7 @@ from omop_alchemy.cdm.base import ( cdm_table, CDMTableBase, + HealthSystemSchemaMixin, optional_concept_fk, merge_table_args, omop_index, @@ -14,7 +15,7 @@ ) @cdm_table -class Care_Site(CDMTableBase, Base): +class Care_Site(HealthSystemSchemaMixin, CDMTableBase, Base): __tablename__ = "care_site" __table_args__ = merge_table_args( omop_index(__tablename__, "place_of_service_concept_id"), diff --git a/omop_alchemy/cdm/model/health_system/location.py b/omop_alchemy/cdm/model/health_system/location.py index 95e8c87..a162a67 100644 --- a/omop_alchemy/cdm/model/health_system/location.py +++ b/omop_alchemy/cdm/model/health_system/location.py @@ -6,6 +6,7 @@ from omop_alchemy.cdm.base import ( cdm_table, CDMTableBase, + HealthSystemSchemaMixin, optional_concept_fk, merge_table_args, omop_index, @@ -14,7 +15,7 @@ ) @cdm_table -class Location(CDMTableBase, Base): +class Location(HealthSystemSchemaMixin, CDMTableBase, Base): __tablename__ = "location" __table_args__ = merge_table_args( omop_index(__tablename__, "country_concept_id"), diff --git a/omop_alchemy/cdm/model/health_system/provider.py b/omop_alchemy/cdm/model/health_system/provider.py index f2c6975..4830317 100644 --- a/omop_alchemy/cdm/model/health_system/provider.py +++ b/omop_alchemy/cdm/model/health_system/provider.py @@ -5,6 +5,7 @@ from omop_alchemy.cdm.base import ( cdm_table, CDMTableBase, + HealthSystemSchemaMixin, optional_concept_fk, merge_table_args, omop_index, @@ -13,7 +14,7 @@ ) @cdm_table -class Provider(CDMTableBase, Base): +class Provider(HealthSystemSchemaMixin, CDMTableBase, Base): __tablename__ = "provider" __table_args__ = merge_table_args( omop_index(__tablename__, "specialty_concept_id"), diff --git a/omop_alchemy/cdm/model/health_system/visit_detail.py b/omop_alchemy/cdm/model/health_system/visit_detail.py index a961e4f..7c97390 100644 --- a/omop_alchemy/cdm/model/health_system/visit_detail.py +++ b/omop_alchemy/cdm/model/health_system/visit_detail.py @@ -7,6 +7,7 @@ from omop_alchemy.cdm.base import ( cdm_table, CDMTableBase, + HealthSystemSchemaMixin, required_concept_fk, optional_concept_fk, merge_table_args, @@ -14,7 +15,7 @@ ) @cdm_table -class Visit_Detail(CDMTableBase, Base): +class Visit_Detail(HealthSystemSchemaMixin, CDMTableBase, Base): __tablename__ = "visit_detail" __table_args__ = merge_table_args( omop_index(__tablename__, "person_id", cluster=True), diff --git a/omop_alchemy/cdm/model/health_system/visit_occurrence.py b/omop_alchemy/cdm/model/health_system/visit_occurrence.py index 4167219..4348d78 100644 --- a/omop_alchemy/cdm/model/health_system/visit_occurrence.py +++ b/omop_alchemy/cdm/model/health_system/visit_occurrence.py @@ -10,6 +10,7 @@ from omop_alchemy.cdm.base import ( cdm_table, CDMTableBase, + HealthSystemSchemaMixin, ReferenceContext, required_concept_fk, optional_concept_fk, @@ -28,7 +29,7 @@ @cdm_table -class Visit_Occurrence(CDMTableBase, Base): +class Visit_Occurrence(HealthSystemSchemaMixin, CDMTableBase, Base): __tablename__ = "visit_occurrence" __table_args__ = merge_table_args( omop_index(__tablename__, "person_id", cluster=True), diff --git a/omop_alchemy/cdm/model/metadata/cdm_source.py b/omop_alchemy/cdm/model/metadata/cdm_source.py index c02ee60..fa3bc38 100644 --- a/omop_alchemy/cdm/model/metadata/cdm_source.py +++ b/omop_alchemy/cdm/model/metadata/cdm_source.py @@ -7,12 +7,13 @@ from omop_alchemy.cdm.base import ( cdm_table, CDMTableBase, + MetadataSchemaMixin, merge_table_args, omop_index, ) @cdm_table -class CDM_Source(CDMTableBase, Base): +class CDM_Source(MetadataSchemaMixin, CDMTableBase, Base): __tablename__ = "cdm_source" __table_args__ = merge_table_args( omop_index(__tablename__, "cdm_version_concept_id") diff --git a/omop_alchemy/cdm/model/metadata/metadata.py b/omop_alchemy/cdm/model/metadata/metadata.py index 79501e9..bcbdf3c 100644 --- a/omop_alchemy/cdm/model/metadata/metadata.py +++ b/omop_alchemy/cdm/model/metadata/metadata.py @@ -7,6 +7,7 @@ from omop_alchemy.cdm.base import ( cdm_table, CDMTableBase, + MetadataSchemaMixin, required_concept_fk, ValueMixin, merge_table_args, @@ -14,7 +15,7 @@ ) @cdm_table -class Metadata(CDMTableBase, Base, ValueMixin): +class Metadata(MetadataSchemaMixin, CDMTableBase, Base, ValueMixin): __tablename__ = "metadata" __table_args__ = merge_table_args( ValueMixin.__table_args__, diff --git a/omop_alchemy/cdm/model/structural/episode.py b/omop_alchemy/cdm/model/structural/episode.py index 0affd93..fccc012 100644 --- a/omop_alchemy/cdm/model/structural/episode.py +++ b/omop_alchemy/cdm/model/structural/episode.py @@ -7,6 +7,7 @@ from omop_alchemy.cdm.base import ( cdm_table, CDMTableBase, + StructuralSchemaMixin, required_concept_fk, optional_concept_fk, PersonScoped, @@ -25,7 +26,7 @@ from ...base.typing import HasEpisodeId @cdm_table -class Episode(CDMTableBase, Base, PersonScoped): +class Episode(StructuralSchemaMixin, CDMTableBase, Base, PersonScoped): __tablename__ = "episode" __table_args__ = merge_table_args( omop_index(__tablename__, "person_id", cluster=True), diff --git a/omop_alchemy/cdm/model/structural/episode_event.py b/omop_alchemy/cdm/model/structural/episode_event.py index 2627e42..de6b84c 100644 --- a/omop_alchemy/cdm/model/structural/episode_event.py +++ b/omop_alchemy/cdm/model/structural/episode_event.py @@ -6,6 +6,7 @@ from omop_alchemy.cdm.base import ( cdm_table, CDMTableBase, + StructuralSchemaMixin, ReferenceContext, DomainValidationMixin, ExpectedDomain, @@ -18,7 +19,7 @@ from .episode import Episode @cdm_table -class Episode_Event(CDMTableBase, Base): +class Episode_Event(StructuralSchemaMixin, CDMTableBase, Base): __tablename__ = "episode_event" __table_args__ = merge_table_args( omop_index(__tablename__, "episode_id", cluster=True), diff --git a/omop_alchemy/cdm/model/structural/fact_relationship.py b/omop_alchemy/cdm/model/structural/fact_relationship.py index 309514b..728e276 100644 --- a/omop_alchemy/cdm/model/structural/fact_relationship.py +++ b/omop_alchemy/cdm/model/structural/fact_relationship.py @@ -5,12 +5,13 @@ from omop_alchemy.cdm.base import ( CDMTableBase, cdm_table, + StructuralSchemaMixin, merge_table_args, omop_index, ) @cdm_table -class Fact_Relationship(CDMTableBase, Base): +class Fact_Relationship(StructuralSchemaMixin, CDMTableBase, Base): __tablename__ = "fact_relationship" __table_args__ = merge_table_args( sa.PrimaryKeyConstraint( diff --git a/omop_alchemy/cdm/model/unstructured/note.py b/omop_alchemy/cdm/model/unstructured/note.py index 369f97c..e7e985b 100644 --- a/omop_alchemy/cdm/model/unstructured/note.py +++ b/omop_alchemy/cdm/model/unstructured/note.py @@ -8,6 +8,7 @@ from omop_alchemy.cdm.base import ( CDMTableBase, cdm_table, + UnstructuredSchemaMixin, optional_concept_fk, PersonScoped, HealthSystemContext, @@ -26,7 +27,7 @@ from ..health_system import Provider, Visit_Occurrence, Visit_Detail @cdm_table -class Note(CDMTableBase, Base, PersonScoped, HealthSystemContext): +class Note(UnstructuredSchemaMixin, CDMTableBase, Base, PersonScoped, HealthSystemContext): __tablename__ = "note" __table_args__ = merge_table_args( omop_index(__tablename__, "person_id", cluster=True), diff --git a/omop_alchemy/cdm/model/unstructured/note_nlp.py b/omop_alchemy/cdm/model/unstructured/note_nlp.py index 22ca819..612b8ed 100644 --- a/omop_alchemy/cdm/model/unstructured/note_nlp.py +++ b/omop_alchemy/cdm/model/unstructured/note_nlp.py @@ -8,6 +8,7 @@ from omop_alchemy.cdm.base import ( CDMTableBase, cdm_table, + UnstructuredSchemaMixin, optional_concept_fk, merge_table_args, omop_index, @@ -18,7 +19,7 @@ @cdm_table -class Note_NLP(CDMTableBase, Base): +class Note_NLP(UnstructuredSchemaMixin, CDMTableBase, Base): __tablename__ = "note_nlp" __table_args__ = merge_table_args( omop_index(__tablename__, "note_id", cluster=True), diff --git a/omop_alchemy/cdm/model/vocabulary/concept.py b/omop_alchemy/cdm/model/vocabulary/concept.py index b77e4b9..491c833 100644 --- a/omop_alchemy/cdm/model/vocabulary/concept.py +++ b/omop_alchemy/cdm/model/vocabulary/concept.py @@ -16,6 +16,7 @@ cdm_table, CDMTableBase, ReferenceContext, + VocabularySchemaMixin, merge_table_args, omop_index, omop_primary_key_index_name, @@ -24,6 +25,7 @@ @cdm_table class Concept( + VocabularySchemaMixin, ReferenceTable, CDMTableBase, Base diff --git a/omop_alchemy/cdm/model/vocabulary/concept_ancestor.py b/omop_alchemy/cdm/model/vocabulary/concept_ancestor.py index d933377..c19c30f 100644 --- a/omop_alchemy/cdm/model/vocabulary/concept_ancestor.py +++ b/omop_alchemy/cdm/model/vocabulary/concept_ancestor.py @@ -5,12 +5,13 @@ ReferenceTable, cdm_table, CDMTableBase, + VocabularySchemaMixin, merge_table_args, omop_index, ) @cdm_table -class Concept_Ancestor(Base, ReferenceTable, CDMTableBase): +class Concept_Ancestor(VocabularySchemaMixin, Base, ReferenceTable, CDMTableBase): __tablename__ = "concept_ancestor" __table_args__ = merge_table_args( omop_index(__tablename__, "ancestor_concept_id", cluster=True), diff --git a/omop_alchemy/cdm/model/vocabulary/concept_class.py b/omop_alchemy/cdm/model/vocabulary/concept_class.py index 5da5410..35b85fc 100644 --- a/omop_alchemy/cdm/model/vocabulary/concept_class.py +++ b/omop_alchemy/cdm/model/vocabulary/concept_class.py @@ -5,12 +5,13 @@ ReferenceTable, cdm_table, CDMTableBase, + VocabularySchemaMixin, merge_table_args, omop_index, ) @cdm_table -class Concept_Class(Base, ReferenceTable, CDMTableBase): +class Concept_Class(VocabularySchemaMixin, Base, ReferenceTable, CDMTableBase): __tablename__ = "concept_class" __table_args__ = merge_table_args( omop_index(__tablename__, "concept_class_id", cluster=True) diff --git a/omop_alchemy/cdm/model/vocabulary/concept_relationship.py b/omop_alchemy/cdm/model/vocabulary/concept_relationship.py index ec2768d..09baf5c 100644 --- a/omop_alchemy/cdm/model/vocabulary/concept_relationship.py +++ b/omop_alchemy/cdm/model/vocabulary/concept_relationship.py @@ -7,12 +7,13 @@ ReferenceTable, cdm_table, CDMTableBase, + VocabularySchemaMixin, merge_table_args, omop_index, ) @cdm_table -class Concept_Relationship(ReferenceTable, CDMTableBase, Base): +class Concept_Relationship(VocabularySchemaMixin, ReferenceTable, CDMTableBase, Base): __tablename__ = "concept_relationship" __table_args__ = merge_table_args( omop_index(__tablename__, "concept_id_1", cluster=True), diff --git a/omop_alchemy/cdm/model/vocabulary/concept_synonym.py b/omop_alchemy/cdm/model/vocabulary/concept_synonym.py index b1b9cb2..62cdff4 100644 --- a/omop_alchemy/cdm/model/vocabulary/concept_synonym.py +++ b/omop_alchemy/cdm/model/vocabulary/concept_synonym.py @@ -5,12 +5,13 @@ ReferenceTable, cdm_table, CDMTableBase, + VocabularySchemaMixin, merge_table_args, omop_index, ) @cdm_table -class Concept_Synonym(Base, ReferenceTable, CDMTableBase): +class Concept_Synonym(VocabularySchemaMixin, Base, ReferenceTable, CDMTableBase): __tablename__ = "concept_synonym" __table_args__ = merge_table_args( omop_index(__tablename__, "concept_id", cluster=True), diff --git a/omop_alchemy/cdm/model/vocabulary/domain.py b/omop_alchemy/cdm/model/vocabulary/domain.py index 8ff824a..0545064 100644 --- a/omop_alchemy/cdm/model/vocabulary/domain.py +++ b/omop_alchemy/cdm/model/vocabulary/domain.py @@ -5,12 +5,13 @@ ReferenceTable, cdm_table, CDMTableBase, + VocabularySchemaMixin, merge_table_args, omop_index ) @cdm_table -class Domain(Base, ReferenceTable, CDMTableBase): +class Domain(VocabularySchemaMixin, Base, ReferenceTable, CDMTableBase): __tablename__ = "domain" __table_args__ = merge_table_args( omop_index(__tablename__, "domain_id", cluster=True), diff --git a/omop_alchemy/cdm/model/vocabulary/drug_strength.py b/omop_alchemy/cdm/model/vocabulary/drug_strength.py index e0f8358..1535e50 100644 --- a/omop_alchemy/cdm/model/vocabulary/drug_strength.py +++ b/omop_alchemy/cdm/model/vocabulary/drug_strength.py @@ -7,12 +7,14 @@ ReferenceTable, cdm_table, CDMTableBase, + VocabularySchemaMixin, merge_table_args, omop_index, ) @cdm_table class Drug_Strength( + VocabularySchemaMixin, CDMTableBase, ReferenceTable, Base, diff --git a/omop_alchemy/cdm/model/vocabulary/relationship.py b/omop_alchemy/cdm/model/vocabulary/relationship.py index 1b2c64d..9b22b99 100644 --- a/omop_alchemy/cdm/model/vocabulary/relationship.py +++ b/omop_alchemy/cdm/model/vocabulary/relationship.py @@ -5,12 +5,13 @@ ReferenceTable, cdm_table, CDMTableBase, + VocabularySchemaMixin, merge_table_args, omop_index ) @cdm_table -class Relationship(Base, ReferenceTable, CDMTableBase): +class Relationship(VocabularySchemaMixin, Base, ReferenceTable, CDMTableBase): __tablename__ = "relationship" __table_args__ = merge_table_args( omop_index(__tablename__, "relationship_id", cluster=True), diff --git a/omop_alchemy/cdm/model/vocabulary/source_to_concept_map.py b/omop_alchemy/cdm/model/vocabulary/source_to_concept_map.py index c48fe1a..ff0c723 100644 --- a/omop_alchemy/cdm/model/vocabulary/source_to_concept_map.py +++ b/omop_alchemy/cdm/model/vocabulary/source_to_concept_map.py @@ -9,6 +9,7 @@ ReferenceTable, cdm_table, CDMTableBase, + VocabularySchemaMixin, DatedEvent, merge_table_args, omop_index, @@ -16,6 +17,7 @@ @cdm_table class Source_To_Concept_Map( + VocabularySchemaMixin, DatedEvent, CDMTableBase, ReferenceTable, diff --git a/omop_alchemy/cdm/model/vocabulary/vocabulary.py b/omop_alchemy/cdm/model/vocabulary/vocabulary.py index 9ee5256..284f2bc 100644 --- a/omop_alchemy/cdm/model/vocabulary/vocabulary.py +++ b/omop_alchemy/cdm/model/vocabulary/vocabulary.py @@ -19,12 +19,13 @@ ReferenceTable, cdm_table, CDMTableBase, + VocabularySchemaMixin, merge_table_args, omop_index ) @cdm_table -class Vocabulary(Base, ReferenceTable, CDMTableBase): +class Vocabulary(VocabularySchemaMixin, Base, ReferenceTable, CDMTableBase): __tablename__ = "vocabulary" __table_args__ = merge_table_args( omop_index(__tablename__, "vocabulary_id", cluster=True), diff --git a/omop_alchemy/maintenance/analyze_tables.py b/omop_alchemy/maintenance/analyze_tables.py index 1f82413..48d8808 100644 --- a/omop_alchemy/maintenance/analyze_tables.py +++ b/omop_alchemy/maintenance/analyze_tables.py @@ -8,6 +8,7 @@ from .tables import ( TableCategory, TableScope, + maintenance_table_schema, qualified_table_name, resolve_maintenance_tables, ) @@ -64,7 +65,10 @@ def analyze_tables( with connection_factory as connection: for maintenance_table in selected_tables: - if not inspector.has_table(maintenance_table.table_name, schema=db_schema): + if not inspector.has_table( + maintenance_table.table_name, + schema=maintenance_table_schema(maintenance_table, db_schema), + ): results.append( AnalyzeTableResult( table_name=maintenance_table.table_name, diff --git a/omop_alchemy/maintenance/data_summary.py b/omop_alchemy/maintenance/data_summary.py index 272a09f..f631df2 100644 --- a/omop_alchemy/maintenance/data_summary.py +++ b/omop_alchemy/maintenance/data_summary.py @@ -4,7 +4,12 @@ import sqlalchemy as sa -from .tables import TableCategory, qualified_table_name, select_omop_tables +from .tables import ( + TableCategory, + maintenance_table_schema, + qualified_table_name, + select_omop_tables, +) @dataclass(frozen=True) @@ -29,7 +34,10 @@ def collect_data_summary( results: list[TableSummaryResult] = [] with engine.connect() as connection: for table in tables: - exists = inspector.has_table(table.table_name, schema=db_schema) + exists = inspector.has_table( + table.table_name, + schema=maintenance_table_schema(table, db_schema), + ) if not exists and existing_only: continue diff --git a/omop_alchemy/maintenance/foreign_keys.py b/omop_alchemy/maintenance/foreign_keys.py index b7b9750..585a177 100644 --- a/omop_alchemy/maintenance/foreign_keys.py +++ b/omop_alchemy/maintenance/foreign_keys.py @@ -10,6 +10,7 @@ MaintenanceTable, TableCategory, existing_maintenance_tables, + maintenance_table_schema, qualified_table_name, ) @@ -25,6 +26,7 @@ class ForeignKeyTarget: category: TableCategory model_name: str model_module: str + schema_name: str | None outgoing_constraint_count: int incoming_constraint_count: int @@ -125,8 +127,16 @@ def collect_foreign_key_targets( incoming_counts = {name: 0 for name in selected_names} outgoing_counts = {name: 0 for name in selected_names} + selected_schema_by_name = { + table.table_name: maintenance_table_schema(table, db_schema) + for table in selected_tables + } + for table_name in selected_names: - foreign_keys = inspector.get_foreign_keys(table_name, schema=db_schema) + foreign_keys = inspector.get_foreign_keys( + table_name, + schema=selected_schema_by_name[table_name], + ) relevant_foreign_keys = [ foreign_key for foreign_key in foreign_keys @@ -152,6 +162,7 @@ def collect_foreign_key_targets( category=table.category, model_name=table.model_name, model_module=table.model_module, + schema_name=table.table.schema, outgoing_constraint_count=outgoing_count, incoming_constraint_count=incoming_count, ) @@ -173,13 +184,20 @@ def _collect_strict_validation_failures( vocabulary_included=vocabulary_included, ) selected_names = {table.table_name for table in selected_tables} + selected_schema_by_name = { + table.table_name: maintenance_table_schema(table, db_schema) + for table in selected_tables + } failures: dict[str, list[ForeignKeyConstraintViolation]] = { table_name: [] for table_name in selected_names } for table_name in sorted(selected_names): - for foreign_key in inspector.get_foreign_keys(table_name, schema=db_schema): + for foreign_key in inspector.get_foreign_keys( + table_name, + schema=selected_schema_by_name[table_name], + ): referred_table = foreign_key.get("referred_table") constrained_columns = foreign_key.get("constrained_columns") or [] referred_columns = foreign_key.get("referred_columns") or [] @@ -191,8 +209,14 @@ def _collect_strict_validation_failures( ): continue - source_table_name = qualified_table_name(table_name, db_schema) - referred_table_name = qualified_table_name(str(referred_table), db_schema) + source_table_name = qualified_table_name( + table_name, + selected_schema_by_name[table_name], + ) + referred_table_name = qualified_table_name( + str(referred_table), + selected_schema_by_name.get(str(referred_table)), + ) non_null_predicate = " AND ".join( f"src.{column_name} IS NOT NULL" for column_name in constrained_columns @@ -468,7 +492,7 @@ def collect_foreign_key_trigger_status( query, { "table_name": target.table_name, - "db_schema": db_schema, + "db_schema": db_schema if db_schema is not None else target.schema_name, }, ).one() diff --git a/omop_alchemy/maintenance/indexes.py b/omop_alchemy/maintenance/indexes.py index 3c7e815..2e2d7dd 100644 --- a/omop_alchemy/maintenance/indexes.py +++ b/omop_alchemy/maintenance/indexes.py @@ -11,6 +11,7 @@ from .tables import ( MaintenanceTable, TableCategory, + maintenance_table_schema, qualified_table_name, schema_adjusted_metadata, select_omop_tables, @@ -106,12 +107,18 @@ def collect_index_targets( targets: list[IndexTarget] = [] for table in selected_tables: - if not inspector.has_table(table.table_name, schema=db_schema): + if not inspector.has_table( + table.table_name, + schema=maintenance_table_schema(table, db_schema), + ): continue existing_index_names = { index["name"] - for index in inspector.get_indexes(table.table_name, schema=db_schema) + for index in inspector.get_indexes( + table.table_name, + schema=maintenance_table_schema(table, db_schema), + ) } for metadata_index in sorted(table.table.indexes, key=lambda idx: idx.name or ""): @@ -154,12 +161,18 @@ def manage_indexes( with engine.begin() as connection: for table in selected_tables: - if not inspector.has_table(table.table_name, schema=db_schema): + if not inspector.has_table( + table.table_name, + schema=maintenance_table_schema(table, db_schema), + ): continue existing_index_names = { index["name"] - for index in inspector.get_indexes(table.table_name, schema=db_schema) + for index in inspector.get_indexes( + table.table_name, + schema=maintenance_table_schema(table, db_schema), + ) } for metadata_index in sorted(table.table.indexes, key=lambda idx: idx.name or ""): diff --git a/omop_alchemy/maintenance/load_vocab.py b/omop_alchemy/maintenance/load_vocab.py index a0ea2af..8862568 100644 --- a/omop_alchemy/maintenance/load_vocab.py +++ b/omop_alchemy/maintenance/load_vocab.py @@ -25,6 +25,7 @@ from ..backend_support import Dialect, require_backend from .reset_sequences import reset_model_sequences from .tables import TableCategory, schema_adjusted_metadata, select_maintenance_tables +from .tables import maintenance_table_schema VocabularyModel: TypeAlias = type[CSVTableProtocol] VocabularyLoadProgressCallback: TypeAlias = Callable[["VocabularyLoadProgress"], None] @@ -228,7 +229,10 @@ def _create_missing_vocabulary_tables( missing_tables = [ table for table in vocab_tables - if not inspector.has_table(table.table_name, schema=db_schema) + if not inspector.has_table( + table.table_name, + schema=maintenance_table_schema(table, db_schema), + ) ] if not missing_tables: return 0 diff --git a/omop_alchemy/maintenance/reconcile.py b/omop_alchemy/maintenance/reconcile.py index ec3c514..294355a 100644 --- a/omop_alchemy/maintenance/reconcile.py +++ b/omop_alchemy/maintenance/reconcile.py @@ -6,7 +6,12 @@ from ..backend_support import Dialect from .indexes import _cluster_target_name -from .tables import MaintenanceTable, TableCategory, select_maintenance_tables +from .tables import ( + MaintenanceTable, + TableCategory, + maintenance_table_schema, + select_maintenance_tables, +) @dataclass(frozen=True) @@ -50,13 +55,13 @@ def _selected_tables( def _schema_table(table: sa.Table, db_schema: str | None) -> sa.Table: - if db_schema is None: + if db_schema is None and table.schema is None: return table metadata = sa.MetaData() return table.to_metadata( metadata, - schema=db_schema, + schema=db_schema if db_schema is not None else table.schema, referred_schema_fn=( lambda _table, to_schema, _constraint, _referred_schema: to_schema ), @@ -152,7 +157,11 @@ def reconcile_schema( with engine.connect() as connection: for maintenance_table in selected_tables: table_issues: list[ReconciliationIssue] = [] - exists = inspector.has_table(maintenance_table.table_name, schema=db_schema) + schema_name = maintenance_table_schema(maintenance_table, db_schema) + exists = inspector.has_table( + maintenance_table.table_name, + schema=schema_name, + ) if not exists: table_issues.append( ReconciliationIssue( @@ -180,17 +189,17 @@ def reconcile_schema( all_issues.extend(table_issues) continue - expected_table = _schema_table(maintenance_table.table, db_schema) + expected_table = _schema_table(maintenance_table.table, schema_name) expected_columns = { column.name: column for column in expected_table.columns } actual_columns = { str(column["name"]): column - for column in inspector.get_columns(maintenance_table.table_name, schema=db_schema) + for column in inspector.get_columns(maintenance_table.table_name, schema=schema_name) } actual_pk_names = tuple( - inspector.get_pk_constraint(maintenance_table.table_name, schema=db_schema).get("constrained_columns") or [] + inspector.get_pk_constraint(maintenance_table.table_name, schema=schema_name).get("constrained_columns") or [] ) expected_pk_names = tuple(column.name for column in expected_table.primary_key.columns) @@ -277,7 +286,7 @@ def reconcile_schema( actual_foreign_keys = _actual_foreign_keys( inspector, maintenance_table.table_name, - db_schema, + schema_name, ) for signature, constraint in expected_foreign_keys.items(): @@ -316,7 +325,7 @@ def reconcile_schema( actual_indexes = _actual_indexes( inspector, maintenance_table.table_name, - db_schema, + schema_name, ) for index_name, index in expected_indexes.items(): @@ -385,7 +394,7 @@ def reconcile_schema( actual_cluster = _actual_cluster_index_name( connection, table_name=maintenance_table.table_name, - db_schema=db_schema, + db_schema=schema_name, ) if expected_cluster != actual_cluster: table_issues.append( diff --git a/omop_alchemy/maintenance/reset_sequences.py b/omop_alchemy/maintenance/reset_sequences.py index aa92b3d..3712833 100644 --- a/omop_alchemy/maintenance/reset_sequences.py +++ b/omop_alchemy/maintenance/reset_sequences.py @@ -5,7 +5,11 @@ import sqlalchemy as sa from ..backend_support import Dialect, require_backend -from .tables import TableCategory, qualified_table_name, select_omop_tables +from .tables import ( + TableCategory, + qualified_table_name, + select_omop_tables, +) @dataclass(frozen=True) @@ -14,6 +18,7 @@ class SequenceTarget: category: TableCategory model_name: str model_module: str + schema_name: str | None pk_column_name: str @@ -46,6 +51,7 @@ def collect_sequence_targets( category=table.category, model_name=table.model_name, model_module=table.model_module, + schema_name=table.table.schema, pk_column_name=pk_column_name, ) ) @@ -73,7 +79,10 @@ def reset_model_sequences( with engine.begin() as connection: for target in targets: - if not inspector.has_table(target.table_name, schema=db_schema): + if not inspector.has_table( + target.table_name, + schema=db_schema if db_schema is not None else target.schema_name, + ): continue fully_qualified_table_name = qualified_table_name(target.table_name, db_schema) diff --git a/omop_alchemy/maintenance/tables.py b/omop_alchemy/maintenance/tables.py index 407bbf0..6c59433 100644 --- a/omop_alchemy/maintenance/tables.py +++ b/omop_alchemy/maintenance/tables.py @@ -71,6 +71,13 @@ def qualified_table_name(table_name: str, db_schema: str | None) -> str: return table_name +def maintenance_table_schema( + table: MaintenanceTable, + db_schema: str | None, +) -> str | None: + return db_schema if db_schema is not None else table.table.schema + + def categories_for_scope(scope: TableScope) -> tuple[TableCategory, ...]: if scope is TableScope.ALL: return tuple(TableCategory) @@ -241,7 +248,10 @@ def existing_maintenance_tables( vocabulary_included=vocabulary_included, require_single_integer_primary_key=require_single_integer_primary_key, ) - if inspector.has_table(table.table_name, schema=db_schema) + if inspector.has_table( + table.table_name, + schema=maintenance_table_schema(table, db_schema), + ) ] @@ -254,7 +264,10 @@ def missing_maintenance_tables( return [ table for table in select_omop_tables(vocabulary_included=vocabulary_included) - if not inspector.has_table(table.table_name, schema=db_schema) + if not inspector.has_table( + table.table_name, + schema=maintenance_table_schema(table, db_schema), + ) ] @@ -263,16 +276,36 @@ def schema_adjusted_metadata( *, db_schema: str | None, ) -> tuple[sa.MetaData, dict[str, sa.Table]]: + tables = list(tables) metadata = sa.MetaData() adjusted_tables: dict[str, sa.Table] = {} + resolved_schemas = { + maintenance_table.table_name: maintenance_table_schema(maintenance_table, db_schema) + for maintenance_table in tables + } + + def referred_schema_fn( + _source_table: sa.Table, + to_schema: str | None, + constraint: sa.ForeignKeyConstraint, + referred_schema: str | None, + ) -> str | None: + referred_table_name = next(iter(constraint.elements)).column.table.name + return resolved_schemas.get(referred_table_name, to_schema or referred_schema) + for maintenance_table in tables: - adjusted_tables[maintenance_table.table_name] = maintenance_table.table.to_metadata( - metadata, - schema=db_schema, - referred_schema_fn=( - lambda _table, to_schema, _constraint, _referred_schema: to_schema - ), - ) + schema_name = resolved_schemas[maintenance_table.table_name] + if schema_name is None: + adjusted_tables[maintenance_table.table_name] = maintenance_table.table.to_metadata( + metadata, + referred_schema_fn=referred_schema_fn, + ) + else: + adjusted_tables[maintenance_table.table_name] = maintenance_table.table.to_metadata( + metadata, + schema=schema_name, + referred_schema_fn=referred_schema_fn, + ) return metadata, adjusted_tables diff --git a/omop_alchemy/maintenance/truncate_tables.py b/omop_alchemy/maintenance/truncate_tables.py index 9cd4853..4f986d2 100644 --- a/omop_alchemy/maintenance/truncate_tables.py +++ b/omop_alchemy/maintenance/truncate_tables.py @@ -8,6 +8,7 @@ from .tables import ( TableCategory, TableScope, + maintenance_table_schema, qualified_table_name, resolve_maintenance_tables, ) @@ -28,19 +29,35 @@ def _blocking_foreign_key_references( inspector: sa.Inspector, *, db_schema: str | None, - selected_table_names: set[str], + selected_tables: list, ) -> dict[str, set[str]]: blockers: dict[str, set[str]] = {} + selected_table_names = {table.table_name for table in selected_tables} + + schemas = sorted( + { + schema_name + for schema_name in ( + db_schema if db_schema is not None else maintenance_table_schema(table, None) + for table in selected_tables + ) + if schema_name is not None + } + ) - for table_name in inspector.get_table_names(schema=db_schema): - if table_name in selected_table_names: - continue + if not schemas: + schemas = [db_schema] if db_schema is not None else [None] - for foreign_key in inspector.get_foreign_keys(table_name, schema=db_schema): - referred_table = foreign_key.get("referred_table") - if referred_table not in selected_table_names: + for schema_name in schemas: + for table_name in inspector.get_table_names(schema=schema_name): + if table_name in selected_table_names: continue - blockers.setdefault(str(referred_table), set()).add(table_name) + + for foreign_key in inspector.get_foreign_keys(table_name, schema=schema_name): + referred_table = foreign_key.get("referred_table") + if referred_table not in selected_table_names: + continue + blockers.setdefault(str(referred_table), set()).add(table_name) return blockers @@ -92,7 +109,10 @@ def truncate_tables( with engine.begin() as connection: for maintenance_table in selected_tables: - if not inspector.has_table(maintenance_table.table_name, schema=db_schema): + if not inspector.has_table( + maintenance_table.table_name, + schema=maintenance_table_schema(maintenance_table, db_schema), + ): results.append( TruncateTableResult( table_name=maintenance_table.table_name, @@ -132,7 +152,9 @@ def truncate_tables( blockers = _blocking_foreign_key_references( inspector, db_schema=db_schema, - selected_table_names=set(existing_tables), + selected_tables=[ + table for table in selected_tables if table.table_name in existing_tables + ], ) if blockers: raise RuntimeError(_format_blocking_reference_error(blockers)) diff --git a/tests/test_schema_mixins.py b/tests/test_schema_mixins.py new file mode 100644 index 0000000..1775c56 --- /dev/null +++ b/tests/test_schema_mixins.py @@ -0,0 +1,42 @@ +from importlib import import_module, reload + +from omop_alchemy.cdm.base import ( + ClinicalSchemaMixin, + DerivedSchemaMixin, + HealthEconomicSchemaMixin, + HealthSystemSchemaMixin, + MetadataSchemaMixin, + StructuralSchemaMixin, + UnstructuredSchemaMixin, + VocabularySchemaMixin, +) +from omop_alchemy.cdm.model.clinical.person import Person +from omop_alchemy.cdm.model.derived.cohort import Cohort +from omop_alchemy.cdm.model.metadata.metadata import Metadata +from omop_alchemy.cdm.model.vocabulary.concept import Concept + + +def test_schema_mixins_default_layout() -> None: + assert ClinicalSchemaMixin.__omop_schema__ == "omop" + assert HealthSystemSchemaMixin.__omop_schema__ == "omop" + assert HealthEconomicSchemaMixin.__omop_schema__ == "omop" + assert StructuralSchemaMixin.__omop_schema__ == "omop" + assert UnstructuredSchemaMixin.__omop_schema__ == "omop" + assert MetadataSchemaMixin.__omop_schema__ == "omop" + assert VocabularySchemaMixin.__omop_schema__ == "vocabulary" + assert DerivedSchemaMixin.__omop_schema__ == "results" + + +def test_representative_tables_have_static_schemas() -> None: + assert Person.__table__.schema == "omop" + assert Concept.__table__.schema == "vocabulary" + assert Cohort.__table__.schema == "results" + assert Metadata.__table__.schema == "omop" + + +def test_schema_mixins_can_read_environment(monkeypatch) -> None: + monkeypatch.setenv("OMOP_VOCABULARY_SCHEMA", "staging_vocab") + module = reload(import_module("omop_alchemy.cdm.base.schema_mixins")) + + assert module.VocabularySchemaMixin.__omop_schema__ == "staging_vocab" + assert module.ClinicalSchemaMixin.__omop_schema__ == "omop" \ No newline at end of file