From a1ab4f287407af316f0bc362334f362e0549934b Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Tue, 10 Feb 2026 15:52:43 +0100 Subject: [PATCH 01/24] add alembic migration --- alembic.ini | 152 ++++++++++++++++++ alembic/env.py | 67 ++++++++ alembic/script.py.mako | 30 ++++ .../c84b85ae4bed_initial_migration.py | 92 +++++++++++ dev_requirements.txt | 1 + requirements.txt | 1 + simdb.db | Bin 0 -> 53248 bytes 7 files changed, 343 insertions(+) create mode 100644 alembic.ini create mode 100644 alembic/env.py create mode 100644 alembic/script.py.mako create mode 100644 alembic/versions/c84b85ae4bed_initial_migration.py create mode 100644 simdb.db diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 00000000..5aca437c --- /dev/null +++ b/alembic.ini @@ -0,0 +1,152 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts. +# this is typically a path given in POSIX (e.g. forward slashes) +# format, relative to the token %(here)s which refers to the location of this +# ini file +script_location = %(here)s/alembic + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file +# for all available tokens +# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s +# Or organize into date-based subdirectories (requires recursive_version_locations = true) +# file_template = %%(year)d/%%(month).2d/%%(day).2d_%%(hour).2d%%(minute).2d_%%(second).2d_%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. for multiple paths, the path separator +# is defined by "path_separator" below. +prepend_sys_path = . + + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the tzdata library which can be installed by adding +# `alembic[tz]` to the pip requirements. +# string value is passed to ZoneInfo() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to /versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "path_separator" +# below. +# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions + +# path_separator; This indicates what character is used to split lists of file +# paths, including version_locations and prepend_sys_path within configparser +# files such as alembic.ini. +# The default rendered in new alembic.ini files is "os", which uses os.pathsep +# to provide os-dependent path splitting. +# +# Note that in order to support legacy alembic.ini files, this default does NOT +# take place if path_separator is not present in alembic.ini. If this +# option is omitted entirely, fallback logic is as follows: +# +# 1. Parsing of the version_locations option falls back to using the legacy +# "version_path_separator" key, which if absent then falls back to the legacy +# behavior of splitting on spaces and/or commas. +# 2. Parsing of the prepend_sys_path option falls back to the legacy +# behavior of splitting on spaces, commas, or colons. +# +# Valid values for path_separator are: +# +# path_separator = : +# path_separator = ; +# path_separator = space +# path_separator = newline +# +# Use os.pathsep. Default configuration used for new projects. +path_separator = os + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +# database URL. This is consumed by the user-maintained env.py script only. +# other means of configuring database URLs may be customized within the env.py +# file. +# NOTE: The actual database URL should be set via the DATABASE_URL environment variable +# or it can be set here for development purposes +# Example: postgresql+psycopg2://user:password@localhost/dbname +sqlalchemy.url = sqlite:///simdb.db + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module +# hooks = ruff +# ruff.type = module +# ruff.module = ruff +# ruff.options = check --fix REVISION_SCRIPT_FILENAME + +# Alternatively, use the exec runner to execute a binary found on your PATH +# hooks = ruff +# ruff.type = exec +# ruff.executable = ruff +# ruff.options = check --fix REVISION_SCRIPT_FILENAME + +# Logging configuration. This is also consumed by the user-maintained +# env.py script only. +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARNING +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARNING +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/alembic/env.py b/alembic/env.py new file mode 100644 index 00000000..ccfed6f4 --- /dev/null +++ b/alembic/env.py @@ -0,0 +1,67 @@ +from logging.config import fileConfig +import os +import sys +from pathlib import Path + +from alembic import context +from sqlalchemy import create_engine, pool + +config = context.config + +if config.config_file_name: + fileConfig(config.config_file_name) + +SRC_PATH = Path(__file__).resolve().parents[1] / "src" +sys.path.insert(0, str(SRC_PATH)) + +from simdb.database.models import Base # noqa +from simdb.database.models import ( # noqa + file, + metadata, + simulation, + watcher, +) +from simdb.database.models import types # noqa + +target_metadata = Base.metadata + +def get_database_url() -> str: + url = os.getenv("DATABASE_URL") + if not url: + raise RuntimeError("DATABASE_URL is not set") + return url + +def run_migrations_offline() -> None: + context.configure( + url=get_database_url(), + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + compare_type=True, + compare_server_default=True, + ) + + with context.begin_transaction(): + context.run_migrations() + +def run_migrations_online() -> None: + engine = create_engine( + get_database_url(), + poolclass=pool.NullPool, + ) + + with engine.connect() as connection: + context.configure( + connection=connection, + target_metadata=target_metadata, + compare_type=True, + compare_server_default=True, + ) + + with context.begin_transaction(): + context.run_migrations() + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/alembic/script.py.mako b/alembic/script.py.mako new file mode 100644 index 00000000..c0865ef6 --- /dev/null +++ b/alembic/script.py.mako @@ -0,0 +1,30 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from simdb.database.models import types + +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + """Upgrade schema.""" + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + """Downgrade schema.""" + ${downgrades if downgrades else "pass"} diff --git a/alembic/versions/c84b85ae4bed_initial_migration.py b/alembic/versions/c84b85ae4bed_initial_migration.py new file mode 100644 index 00000000..d992f3bd --- /dev/null +++ b/alembic/versions/c84b85ae4bed_initial_migration.py @@ -0,0 +1,92 @@ +"""initial migration + +Revision ID: c84b85ae4bed +Revises: +Create Date: 2026-02-10 15:24:11.323077 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from simdb.database.models.types import UUID, ChoiceType +from simdb.notifications import Notification + +# revision identifiers, used by Alembic. +revision: str = 'c84b85ae4bed' +down_revision: Union[str, Sequence[str], None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +# Define notification choices +NOTIFICATION_CHOICES = { + Notification.VALIDATION: "V", + Notification.REVISION: "R", + Notification.OBSOLESCENCE: "O", + Notification.ALL: "A", +} + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('simulations', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('uuid', UUID(), nullable=False), + sa.Column('alias', sa.String(length=250), nullable=True), + sa.Column('datetime', sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_simulations_alias'), 'simulations', ['alias'], unique=True) + op.create_index(op.f('ix_simulations_uuid'), 'simulations', ['uuid'], unique=True) + op.create_table('watchers', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('username', sa.String(length=250), nullable=True), + sa.Column('email', sa.String(length=1000), nullable=True), + sa.Column('notification', ChoiceType(choices=NOTIFICATION_CHOICES, length=1, enum_type=Notification), nullable=True), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('metadata', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('sim_id', sa.Integer(), nullable=True), + sa.Column('element', sa.String(length=250), nullable=False), + sa.Column('value', sa.PickleType(), nullable=True), + sa.ForeignKeyConstraint(['sim_id'], ['simulations.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_metadata_sim_id'), 'metadata', ['sim_id'], unique=False) + op.create_index('metadata_index', 'metadata', ['sim_id', 'element'], unique=True) + op.create_table('simulation_input_files', + sa.Column('simulation_id', sa.Integer(), nullable=True), + sa.Column('file_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['file_id'], ['files.id'], ), + sa.ForeignKeyConstraint(['simulation_id'], ['simulations.id'], ) + ) + op.create_table('simulation_output_files', + sa.Column('simulation_id', sa.Integer(), nullable=True), + sa.Column('file_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['file_id'], ['files.id'], ), + sa.ForeignKeyConstraint(['simulation_id'], ['simulations.id'], ) + ) + op.create_table('simulation_watchers', + sa.Column('simulation_id', sa.Integer(), nullable=True), + sa.Column('watcher_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['simulation_id'], ['simulations.id'], ), + sa.ForeignKeyConstraint(['watcher_id'], ['watchers.id'], ) + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('simulation_watchers') + op.drop_table('simulation_output_files') + op.drop_table('simulation_input_files') + op.drop_index('metadata_index', table_name='metadata') + op.drop_index(op.f('ix_metadata_sim_id'), table_name='metadata') + op.drop_table('metadata') + op.drop_table('watchers') + op.drop_index(op.f('ix_simulations_uuid'), table_name='simulations') + op.drop_index(op.f('ix_simulations_alias'), table_name='simulations') + op.drop_table('simulations') + # ### end Alembic commands ### diff --git a/dev_requirements.txt b/dev_requirements.txt index 6fc6c7fc..031eeb99 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -5,6 +5,7 @@ numpy>=1.14.0 python-dateutil>=2.6 PyYAML>=3.13 SQLAlchemy~=1.4 +alembic~=1.13 urllib3~=1.23 requests~=2.27 pytest~=6.0 diff --git a/requirements.txt b/requirements.txt index 36993c4c..26bef65d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ numpy>=1.14.0 python-dateutil>=2.6 PyYAML>=3.13 SQLAlchemy~=1.4 +alembic~=1.13 urllib3~=1.23 requests~=2.27 pytest>=6.0 diff --git a/simdb.db b/simdb.db new file mode 100644 index 0000000000000000000000000000000000000000..8b2bf6941c3c42559345fda3d7e2473f60f949d1 GIT binary patch literal 53248 zcmeI(U2oe|7{GD6?ds-j*`2Q{9aK%k>V(>5MVkf^Y3G$dmQXhZwj0Y$T+LcuMq-a~ zfl#_n!pGoq@FBS6k{cwBow&8#q;QF>ztt-7iOs(&fvtyfC5QvL02xAI5j=ko8R zze~TCJ}Vv)^r`wm%?3RV{I`K`UwD!tF^Xhy zc~$VqgKh-~TfZgx#mA`tq5&<9I5A4;ahsKlds5W&Cr{MNU$Sv~qh(5BGQs}-Iv^C2bA$VC+5+lF=YWy7jHJlHc*Ro*#+z!itB&SBWVH&*jS z!+IH>s;j1?#QC?m`0?gEs*j?m;xjaU)lyN#rME;COx?-I8M>(;!qK`z#~WOg?(gmG zMOTf{*!M2HUUaP!QPlfUc{aGB@%oOuLhh(DEl%$BDL1u)!0RXWHSaB1F-Uf@kxD~q z#4G1l7|L9<{xCg_=EF(OvM@pNbaZ`BX6~81r*xYy%+%DonU9&FYWnW3dRd4D?#NT{ z-0Ruz Date: Tue, 10 Feb 2026 16:13:17 +0100 Subject: [PATCH 02/24] add alembic to pyproject, delete db and adjust migration --- .../c84b85ae4bed_initial_migration.py | 111 ++++++++++-------- pyproject.toml | 1 + simdb.db | Bin 53248 -> 0 bytes 3 files changed, 66 insertions(+), 46 deletions(-) delete mode 100644 simdb.db diff --git a/alembic/versions/c84b85ae4bed_initial_migration.py b/alembic/versions/c84b85ae4bed_initial_migration.py index d992f3bd..e0193b1d 100644 --- a/alembic/versions/c84b85ae4bed_initial_migration.py +++ b/alembic/versions/c84b85ae4bed_initial_migration.py @@ -28,52 +28,71 @@ def upgrade() -> None: """Upgrade schema.""" - # ### commands auto generated by Alembic - please adjust! ### - op.create_table('simulations', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('uuid', UUID(), nullable=False), - sa.Column('alias', sa.String(length=250), nullable=True), - sa.Column('datetime', sa.DateTime(), nullable=False), - sa.PrimaryKeyConstraint('id') - ) - op.create_index(op.f('ix_simulations_alias'), 'simulations', ['alias'], unique=True) - op.create_index(op.f('ix_simulations_uuid'), 'simulations', ['uuid'], unique=True) - op.create_table('watchers', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('username', sa.String(length=250), nullable=True), - sa.Column('email', sa.String(length=1000), nullable=True), - sa.Column('notification', ChoiceType(choices=NOTIFICATION_CHOICES, length=1, enum_type=Notification), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - op.create_table('metadata', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('sim_id', sa.Integer(), nullable=True), - sa.Column('element', sa.String(length=250), nullable=False), - sa.Column('value', sa.PickleType(), nullable=True), - sa.ForeignKeyConstraint(['sim_id'], ['simulations.id'], ), - sa.PrimaryKeyConstraint('id') - ) - op.create_index(op.f('ix_metadata_sim_id'), 'metadata', ['sim_id'], unique=False) - op.create_index('metadata_index', 'metadata', ['sim_id', 'element'], unique=True) - op.create_table('simulation_input_files', - sa.Column('simulation_id', sa.Integer(), nullable=True), - sa.Column('file_id', sa.Integer(), nullable=True), - sa.ForeignKeyConstraint(['file_id'], ['files.id'], ), - sa.ForeignKeyConstraint(['simulation_id'], ['simulations.id'], ) - ) - op.create_table('simulation_output_files', - sa.Column('simulation_id', sa.Integer(), nullable=True), - sa.Column('file_id', sa.Integer(), nullable=True), - sa.ForeignKeyConstraint(['file_id'], ['files.id'], ), - sa.ForeignKeyConstraint(['simulation_id'], ['simulations.id'], ) - ) - op.create_table('simulation_watchers', - sa.Column('simulation_id', sa.Integer(), nullable=True), - sa.Column('watcher_id', sa.Integer(), nullable=True), - sa.ForeignKeyConstraint(['simulation_id'], ['simulations.id'], ), - sa.ForeignKeyConstraint(['watcher_id'], ['watchers.id'], ) - ) - # ### end Alembic commands ### + # Get connection to check for existing tables + conn = op.get_bind() + inspector = sa.inspect(conn) + existing_tables = inspector.get_table_names() + # Create simulations table if it doesn't exist + if 'simulations' not in existing_tables: + op.create_table('simulations', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('uuid', UUID(), nullable=False), + sa.Column('alias', sa.String(length=250), nullable=True), + sa.Column('datetime', sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_simulations_alias'), 'simulations', ['alias'], unique=True) + op.create_index(op.f('ix_simulations_uuid'), 'simulations', ['uuid'], unique=True) + + # Create watchers table if it doesn't exist + if 'watchers' not in existing_tables: + op.create_table('watchers', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('username', sa.String(length=250), nullable=True), + sa.Column('email', sa.String(length=1000), nullable=True), + sa.Column('notification', ChoiceType(choices=NOTIFICATION_CHOICES, length=1, enum_type=Notification), nullable=True), + sa.PrimaryKeyConstraint('id') + ) + + # Create metadata table if it doesn't exist + if 'metadata' not in existing_tables: + op.create_table('metadata', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('sim_id', sa.Integer(), nullable=True), + sa.Column('element', sa.String(length=250), nullable=False), + sa.Column('value', sa.PickleType(), nullable=True), + sa.ForeignKeyConstraint(['sim_id'], ['simulations.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_metadata_sim_id'), 'metadata', ['sim_id'], unique=False) + op.create_index('metadata_index', 'metadata', ['sim_id', 'element'], unique=True) + + # Create simulation_input_files table if it doesn't exist + if 'simulation_input_files' not in existing_tables: + op.create_table('simulation_input_files', + sa.Column('simulation_id', sa.Integer(), nullable=True), + sa.Column('file_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['file_id'], ['files.id'], ), + sa.ForeignKeyConstraint(['simulation_id'], ['simulations.id'], ) + ) + + # Create simulation_output_files table if it doesn't exist + if 'simulation_output_files' not in existing_tables: + op.create_table('simulation_output_files', + sa.Column('simulation_id', sa.Integer(), nullable=True), + sa.Column('file_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['file_id'], ['files.id'], ), + sa.ForeignKeyConstraint(['simulation_id'], ['simulations.id'], ) + ) + + # Create simulation_watchers table if it doesn't exist + if 'simulation_watchers' not in existing_tables: + op.create_table('simulation_watchers', + sa.Column('simulation_id', sa.Integer(), nullable=True), + sa.Column('watcher_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['simulation_id'], ['simulations.id'], ), + sa.ForeignKeyConstraint(['watcher_id'], ['watchers.id'], ) + ) def downgrade() -> None: diff --git a/pyproject.toml b/pyproject.toml index 344e3e55..37032bee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ dependencies = [ "requests>=2.27.0", "semantic-version>=2.8", "sqlalchemy>=1.2.12,<2.0", + "alembic~=1.13", "urllib3>=1.26", ] diff --git a/simdb.db b/simdb.db deleted file mode 100644 index 8b2bf6941c3c42559345fda3d7e2473f60f949d1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 53248 zcmeI(U2oe|7{GD6?ds-j*`2Q{9aK%k>V(>5MVkf^Y3G$dmQXhZwj0Y$T+LcuMq-a~ zfl#_n!pGoq@FBS6k{cwBow&8#q;QF>ztt-7iOs(&fvtyfC5QvL02xAI5j=ko8R zze~TCJ}Vv)^r`wm%?3RV{I`K`UwD!tF^Xhy zc~$VqgKh-~TfZgx#mA`tq5&<9I5A4;ahsKlds5W&Cr{MNU$Sv~qh(5BGQs}-Iv^C2bA$VC+5+lF=YWy7jHJlHc*Ro*#+z!itB&SBWVH&*jS z!+IH>s;j1?#QC?m`0?gEs*j?m;xjaU)lyN#rME;COx?-I8M>(;!qK`z#~WOg?(gmG zMOTf{*!M2HUUaP!QPlfUc{aGB@%oOuLhh(DEl%$BDL1u)!0RXWHSaB1F-Uf@kxD~q z#4G1l7|L9<{xCg_=EF(OvM@pNbaZ`BX6~81r*xYy%+%DonU9&FYWnW3dRd4D?#NT{ z-0Ruz Date: Wed, 11 Feb 2026 13:11:17 +0100 Subject: [PATCH 03/24] format --- alembic/env.py | 4 + .../c84b85ae4bed_initial_migration.py | 165 +++++++++++------- 2 files changed, 108 insertions(+), 61 deletions(-) diff --git a/alembic/env.py b/alembic/env.py index ccfed6f4..3e64a98a 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -25,12 +25,14 @@ target_metadata = Base.metadata + def get_database_url() -> str: url = os.getenv("DATABASE_URL") if not url: raise RuntimeError("DATABASE_URL is not set") return url + def run_migrations_offline() -> None: context.configure( url=get_database_url(), @@ -44,6 +46,7 @@ def run_migrations_offline() -> None: with context.begin_transaction(): context.run_migrations() + def run_migrations_online() -> None: engine = create_engine( get_database_url(), @@ -61,6 +64,7 @@ def run_migrations_online() -> None: with context.begin_transaction(): context.run_migrations() + if context.is_offline_mode(): run_migrations_offline() else: diff --git a/alembic/versions/c84b85ae4bed_initial_migration.py b/alembic/versions/c84b85ae4bed_initial_migration.py index e0193b1d..24425c7d 100644 --- a/alembic/versions/c84b85ae4bed_initial_migration.py +++ b/alembic/versions/c84b85ae4bed_initial_migration.py @@ -1,10 +1,11 @@ """initial migration Revision ID: c84b85ae4bed -Revises: +Revises: Create Date: 2026-02-10 15:24:11.323077 """ + from typing import Sequence, Union from alembic import op @@ -13,7 +14,7 @@ from simdb.notifications import Notification # revision identifiers, used by Alembic. -revision: str = 'c84b85ae4bed' +revision: str = "c84b85ae4bed" down_revision: Union[str, Sequence[str], None] = None branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None @@ -26,6 +27,7 @@ Notification.ALL: "A", } + def upgrade() -> None: """Upgrade schema.""" # Get connection to check for existing tables @@ -33,79 +35,120 @@ def upgrade() -> None: inspector = sa.inspect(conn) existing_tables = inspector.get_table_names() # Create simulations table if it doesn't exist - if 'simulations' not in existing_tables: - op.create_table('simulations', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('uuid', UUID(), nullable=False), - sa.Column('alias', sa.String(length=250), nullable=True), - sa.Column('datetime', sa.DateTime(), nullable=False), - sa.PrimaryKeyConstraint('id') + if "simulations" not in existing_tables: + op.create_table( + "simulations", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("uuid", UUID(), nullable=False), + sa.Column("alias", sa.String(length=250), nullable=True), + sa.Column("datetime", sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index( + op.f("ix_simulations_alias"), "simulations", ["alias"], unique=True ) - op.create_index(op.f('ix_simulations_alias'), 'simulations', ['alias'], unique=True) - op.create_index(op.f('ix_simulations_uuid'), 'simulations', ['uuid'], unique=True) - + op.create_index( + op.f("ix_simulations_uuid"), "simulations", ["uuid"], unique=True + ) + # Create watchers table if it doesn't exist - if 'watchers' not in existing_tables: - op.create_table('watchers', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('username', sa.String(length=250), nullable=True), - sa.Column('email', sa.String(length=1000), nullable=True), - sa.Column('notification', ChoiceType(choices=NOTIFICATION_CHOICES, length=1, enum_type=Notification), nullable=True), - sa.PrimaryKeyConstraint('id') + if "watchers" not in existing_tables: + op.create_table( + "watchers", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("username", sa.String(length=250), nullable=True), + sa.Column("email", sa.String(length=1000), nullable=True), + sa.Column( + "notification", + ChoiceType( + choices=NOTIFICATION_CHOICES, length=1, enum_type=Notification + ), + nullable=True, + ), + sa.PrimaryKeyConstraint("id"), ) - + # Create metadata table if it doesn't exist - if 'metadata' not in existing_tables: - op.create_table('metadata', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('sim_id', sa.Integer(), nullable=True), - sa.Column('element', sa.String(length=250), nullable=False), - sa.Column('value', sa.PickleType(), nullable=True), - sa.ForeignKeyConstraint(['sim_id'], ['simulations.id'], ), - sa.PrimaryKeyConstraint('id') + if "metadata" not in existing_tables: + op.create_table( + "metadata", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("sim_id", sa.Integer(), nullable=True), + sa.Column("element", sa.String(length=250), nullable=False), + sa.Column("value", sa.PickleType(), nullable=True), + sa.ForeignKeyConstraint( + ["sim_id"], + ["simulations.id"], + ), + sa.PrimaryKeyConstraint("id"), ) - op.create_index(op.f('ix_metadata_sim_id'), 'metadata', ['sim_id'], unique=False) - op.create_index('metadata_index', 'metadata', ['sim_id', 'element'], unique=True) - + op.create_index( + op.f("ix_metadata_sim_id"), "metadata", ["sim_id"], unique=False + ) + op.create_index( + "metadata_index", "metadata", ["sim_id", "element"], unique=True + ) + # Create simulation_input_files table if it doesn't exist - if 'simulation_input_files' not in existing_tables: - op.create_table('simulation_input_files', - sa.Column('simulation_id', sa.Integer(), nullable=True), - sa.Column('file_id', sa.Integer(), nullable=True), - sa.ForeignKeyConstraint(['file_id'], ['files.id'], ), - sa.ForeignKeyConstraint(['simulation_id'], ['simulations.id'], ) + if "simulation_input_files" not in existing_tables: + op.create_table( + "simulation_input_files", + sa.Column("simulation_id", sa.Integer(), nullable=True), + sa.Column("file_id", sa.Integer(), nullable=True), + sa.ForeignKeyConstraint( + ["file_id"], + ["files.id"], + ), + sa.ForeignKeyConstraint( + ["simulation_id"], + ["simulations.id"], + ), ) - + # Create simulation_output_files table if it doesn't exist - if 'simulation_output_files' not in existing_tables: - op.create_table('simulation_output_files', - sa.Column('simulation_id', sa.Integer(), nullable=True), - sa.Column('file_id', sa.Integer(), nullable=True), - sa.ForeignKeyConstraint(['file_id'], ['files.id'], ), - sa.ForeignKeyConstraint(['simulation_id'], ['simulations.id'], ) + if "simulation_output_files" not in existing_tables: + op.create_table( + "simulation_output_files", + sa.Column("simulation_id", sa.Integer(), nullable=True), + sa.Column("file_id", sa.Integer(), nullable=True), + sa.ForeignKeyConstraint( + ["file_id"], + ["files.id"], + ), + sa.ForeignKeyConstraint( + ["simulation_id"], + ["simulations.id"], + ), ) - + # Create simulation_watchers table if it doesn't exist - if 'simulation_watchers' not in existing_tables: - op.create_table('simulation_watchers', - sa.Column('simulation_id', sa.Integer(), nullable=True), - sa.Column('watcher_id', sa.Integer(), nullable=True), - sa.ForeignKeyConstraint(['simulation_id'], ['simulations.id'], ), - sa.ForeignKeyConstraint(['watcher_id'], ['watchers.id'], ) + if "simulation_watchers" not in existing_tables: + op.create_table( + "simulation_watchers", + sa.Column("simulation_id", sa.Integer(), nullable=True), + sa.Column("watcher_id", sa.Integer(), nullable=True), + sa.ForeignKeyConstraint( + ["simulation_id"], + ["simulations.id"], + ), + sa.ForeignKeyConstraint( + ["watcher_id"], + ["watchers.id"], + ), ) def downgrade() -> None: """Downgrade schema.""" # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('simulation_watchers') - op.drop_table('simulation_output_files') - op.drop_table('simulation_input_files') - op.drop_index('metadata_index', table_name='metadata') - op.drop_index(op.f('ix_metadata_sim_id'), table_name='metadata') - op.drop_table('metadata') - op.drop_table('watchers') - op.drop_index(op.f('ix_simulations_uuid'), table_name='simulations') - op.drop_index(op.f('ix_simulations_alias'), table_name='simulations') - op.drop_table('simulations') + op.drop_table("simulation_watchers") + op.drop_table("simulation_output_files") + op.drop_table("simulation_input_files") + op.drop_index("metadata_index", table_name="metadata") + op.drop_index(op.f("ix_metadata_sim_id"), table_name="metadata") + op.drop_table("metadata") + op.drop_table("watchers") + op.drop_index(op.f("ix_simulations_uuid"), table_name="simulations") + op.drop_index(op.f("ix_simulations_alias"), table_name="simulations") + op.drop_table("simulations") # ### end Alembic commands ### From 5bad0881386d071890ba816691a94c7d754d5ff0 Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Wed, 11 Feb 2026 13:15:51 +0100 Subject: [PATCH 04/24] lint --- alembic/env.py | 5 +++-- alembic/versions/c84b85ae4bed_initial_migration.py | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/alembic/env.py b/alembic/env.py index 3e64a98a..0778898a 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -1,11 +1,12 @@ -from logging.config import fileConfig import os import sys +from logging.config import fileConfig from pathlib import Path -from alembic import context from sqlalchemy import create_engine, pool +from alembic import context + config = context.config if config.config_file_name: diff --git a/alembic/versions/c84b85ae4bed_initial_migration.py b/alembic/versions/c84b85ae4bed_initial_migration.py index 24425c7d..d6e8220b 100644 --- a/alembic/versions/c84b85ae4bed_initial_migration.py +++ b/alembic/versions/c84b85ae4bed_initial_migration.py @@ -8,8 +8,9 @@ from typing import Sequence, Union -from alembic import op import sqlalchemy as sa + +from alembic import op from simdb.database.models.types import UUID, ChoiceType from simdb.notifications import Notification From 78462656d6d9a9c79f118799daf2798316424140 Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Fri, 13 Feb 2026 10:27:54 +0100 Subject: [PATCH 05/24] redo autogenerate alembic to add files --- .../21f2b1287595_create_init_tables.py | 139 ++++++++++++++++ .../c84b85ae4bed_initial_migration.py | 155 ------------------ 2 files changed, 139 insertions(+), 155 deletions(-) create mode 100644 alembic/versions/21f2b1287595_create_init_tables.py delete mode 100644 alembic/versions/c84b85ae4bed_initial_migration.py diff --git a/alembic/versions/21f2b1287595_create_init_tables.py b/alembic/versions/21f2b1287595_create_init_tables.py new file mode 100644 index 00000000..b0398f7e --- /dev/null +++ b/alembic/versions/21f2b1287595_create_init_tables.py @@ -0,0 +1,139 @@ +"""create init tables + +Revision ID: 21f2b1287595 +Revises: +Create Date: 2026-02-13 10:11:39.262884 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from simdb.database.models.types import URI, UUID, ChoiceType +from simdb.notifications import Notification + + + +# revision identifiers, used by Alembic. +revision: str = '21f2b1287595' +down_revision: Union[str, Sequence[str], None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +# Define notification choices +NOTIFICATION_CHOICES = { + Notification.VALIDATION: "V", + Notification.REVISION: "R", + Notification.OBSOLESCENCE: "O", + Notification.ALL: "A", +} + +def upgrade() -> None: + """Upgrade schema.""" + # Get connection to inspect existing database schema + conn = op.get_bind() + inspector = sa.inspect(conn) + existing_tables = inspector.get_table_names() + + # Create files table if it doesn't exist + if 'files' not in existing_tables: + op.create_table('files', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('uuid', UUID(), nullable=False), + sa.Column('usage', sa.String(length=250), nullable=True), + sa.Column('uri', URI(length=1024), nullable=True), + sa.Column('checksum', sa.String(length=64), nullable=True), + sa.Column('type', sa.Enum('UNKNOWN', 'UUID', 'FILE', 'IMAS', 'UDA', name='type'), nullable=True), + sa.Column('purpose', sa.String(length=250), nullable=True), + sa.Column('sensitivity', sa.String(length=20), nullable=True), + sa.Column('access', sa.String(length=20), nullable=True), + sa.Column('embargo', sa.String(length=20), nullable=True), + sa.Column('datetime', sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_files_uuid'), 'files', ['uuid'], unique=True) + + # Create simulations table if it doesn't exist + if 'simulations' not in existing_tables: + op.create_table('simulations', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('uuid', UUID(), nullable=False), + sa.Column('alias', sa.String(length=250), nullable=True), + sa.Column('datetime', sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_simulations_alias'), 'simulations', ['alias'], unique=True) + op.create_index(op.f('ix_simulations_uuid'), 'simulations', ['uuid'], unique=True) + + # Create watchers table if it doesn't exist + if 'watchers' not in existing_tables: + op.create_table('watchers', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('username', sa.String(length=250), nullable=True), + sa.Column('email', sa.String(length=1000), nullable=True), + sa.Column( + "notification", + ChoiceType(choices=NOTIFICATION_CHOICES, length=1, enum_type=Notification), + nullable=True, + ), + sa.PrimaryKeyConstraint('id') + ) + + # Create metadata table if it doesn't exist + if 'metadata' not in existing_tables: + op.create_table('metadata', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('sim_id', sa.Integer(), nullable=True), + sa.Column('element', sa.String(length=250), nullable=False), + sa.Column('value', sa.PickleType(), nullable=True), + sa.ForeignKeyConstraint(['sim_id'], ['simulations.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_metadata_sim_id'), 'metadata', ['sim_id'], unique=False) + op.create_index('metadata_index', 'metadata', ['sim_id', 'element'], unique=True) + + # Create simulation_input_files table if it doesn't exist + if 'simulation_input_files' not in existing_tables: + op.create_table('simulation_input_files', + sa.Column('simulation_id', sa.Integer(), nullable=True), + sa.Column('file_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['file_id'], ['files.id'], ), + sa.ForeignKeyConstraint(['simulation_id'], ['simulations.id'], ) + ) + + # Create simulation_output_files table if it doesn't exist + if 'simulation_output_files' not in existing_tables: + op.create_table('simulation_output_files', + sa.Column('simulation_id', sa.Integer(), nullable=True), + sa.Column('file_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['file_id'], ['files.id'], ), + sa.ForeignKeyConstraint(['simulation_id'], ['simulations.id'], ) + ) + + # Create simulation_watchers table if it doesn't exist + if 'simulation_watchers' not in existing_tables: + op.create_table('simulation_watchers', + sa.Column('simulation_id', sa.Integer(), nullable=True), + sa.Column('watcher_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['simulation_id'], ['simulations.id'], ), + sa.ForeignKeyConstraint(['watcher_id'], ['watchers.id'], ) + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('simulation_watchers') + op.drop_table('simulation_output_files') + op.drop_table('simulation_input_files') + op.drop_index('metadata_index', table_name='metadata') + op.drop_index(op.f('ix_metadata_sim_id'), table_name='metadata') + op.drop_table('metadata') + op.drop_table('watchers') + op.drop_index(op.f('ix_simulations_uuid'), table_name='simulations') + op.drop_index(op.f('ix_simulations_alias'), table_name='simulations') + op.drop_table('simulations') + op.drop_index(op.f('ix_files_uuid'), table_name='files') + op.drop_table('files') + # ### end Alembic commands ### diff --git a/alembic/versions/c84b85ae4bed_initial_migration.py b/alembic/versions/c84b85ae4bed_initial_migration.py deleted file mode 100644 index d6e8220b..00000000 --- a/alembic/versions/c84b85ae4bed_initial_migration.py +++ /dev/null @@ -1,155 +0,0 @@ -"""initial migration - -Revision ID: c84b85ae4bed -Revises: -Create Date: 2026-02-10 15:24:11.323077 - -""" - -from typing import Sequence, Union - -import sqlalchemy as sa - -from alembic import op -from simdb.database.models.types import UUID, ChoiceType -from simdb.notifications import Notification - -# revision identifiers, used by Alembic. -revision: str = "c84b85ae4bed" -down_revision: Union[str, Sequence[str], None] = None -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - -# Define notification choices -NOTIFICATION_CHOICES = { - Notification.VALIDATION: "V", - Notification.REVISION: "R", - Notification.OBSOLESCENCE: "O", - Notification.ALL: "A", -} - - -def upgrade() -> None: - """Upgrade schema.""" - # Get connection to check for existing tables - conn = op.get_bind() - inspector = sa.inspect(conn) - existing_tables = inspector.get_table_names() - # Create simulations table if it doesn't exist - if "simulations" not in existing_tables: - op.create_table( - "simulations", - sa.Column("id", sa.Integer(), nullable=False), - sa.Column("uuid", UUID(), nullable=False), - sa.Column("alias", sa.String(length=250), nullable=True), - sa.Column("datetime", sa.DateTime(), nullable=False), - sa.PrimaryKeyConstraint("id"), - ) - op.create_index( - op.f("ix_simulations_alias"), "simulations", ["alias"], unique=True - ) - op.create_index( - op.f("ix_simulations_uuid"), "simulations", ["uuid"], unique=True - ) - - # Create watchers table if it doesn't exist - if "watchers" not in existing_tables: - op.create_table( - "watchers", - sa.Column("id", sa.Integer(), nullable=False), - sa.Column("username", sa.String(length=250), nullable=True), - sa.Column("email", sa.String(length=1000), nullable=True), - sa.Column( - "notification", - ChoiceType( - choices=NOTIFICATION_CHOICES, length=1, enum_type=Notification - ), - nullable=True, - ), - sa.PrimaryKeyConstraint("id"), - ) - - # Create metadata table if it doesn't exist - if "metadata" not in existing_tables: - op.create_table( - "metadata", - sa.Column("id", sa.Integer(), nullable=False), - sa.Column("sim_id", sa.Integer(), nullable=True), - sa.Column("element", sa.String(length=250), nullable=False), - sa.Column("value", sa.PickleType(), nullable=True), - sa.ForeignKeyConstraint( - ["sim_id"], - ["simulations.id"], - ), - sa.PrimaryKeyConstraint("id"), - ) - op.create_index( - op.f("ix_metadata_sim_id"), "metadata", ["sim_id"], unique=False - ) - op.create_index( - "metadata_index", "metadata", ["sim_id", "element"], unique=True - ) - - # Create simulation_input_files table if it doesn't exist - if "simulation_input_files" not in existing_tables: - op.create_table( - "simulation_input_files", - sa.Column("simulation_id", sa.Integer(), nullable=True), - sa.Column("file_id", sa.Integer(), nullable=True), - sa.ForeignKeyConstraint( - ["file_id"], - ["files.id"], - ), - sa.ForeignKeyConstraint( - ["simulation_id"], - ["simulations.id"], - ), - ) - - # Create simulation_output_files table if it doesn't exist - if "simulation_output_files" not in existing_tables: - op.create_table( - "simulation_output_files", - sa.Column("simulation_id", sa.Integer(), nullable=True), - sa.Column("file_id", sa.Integer(), nullable=True), - sa.ForeignKeyConstraint( - ["file_id"], - ["files.id"], - ), - sa.ForeignKeyConstraint( - ["simulation_id"], - ["simulations.id"], - ), - ) - - # Create simulation_watchers table if it doesn't exist - if "simulation_watchers" not in existing_tables: - op.create_table( - "simulation_watchers", - sa.Column("simulation_id", sa.Integer(), nullable=True), - sa.Column("watcher_id", sa.Integer(), nullable=True), - sa.ForeignKeyConstraint( - ["simulation_id"], - ["simulations.id"], - ), - sa.ForeignKeyConstraint( - ["watcher_id"], - ["watchers.id"], - ), - ) - - -def downgrade() -> None: - """Downgrade schema.""" - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table("simulation_watchers") - op.drop_table("simulation_output_files") - op.drop_table("simulation_input_files") - op.drop_index("metadata_index", table_name="metadata") - op.drop_index(op.f("ix_metadata_sim_id"), table_name="metadata") - op.drop_table("metadata") - op.drop_table("watchers") - op.drop_index(op.f("ix_simulations_uuid"), table_name="simulations") - op.drop_index(op.f("ix_simulations_alias"), table_name="simulations") - op.drop_table("simulations") - # ### end Alembic commands ### From 3712954c702c30bdf4184ad724a5cdf7e3998be9 Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Fri, 13 Feb 2026 10:28:38 +0100 Subject: [PATCH 06/24] lint --- .../21f2b1287595_create_init_tables.py | 195 +++++++++++------- 1 file changed, 119 insertions(+), 76 deletions(-) diff --git a/alembic/versions/21f2b1287595_create_init_tables.py b/alembic/versions/21f2b1287595_create_init_tables.py index b0398f7e..0cf80b97 100644 --- a/alembic/versions/21f2b1287595_create_init_tables.py +++ b/alembic/versions/21f2b1287595_create_init_tables.py @@ -1,10 +1,11 @@ """create init tables Revision ID: 21f2b1287595 -Revises: +Revises: Create Date: 2026-02-13 10:11:39.262884 """ + from typing import Sequence, Union from alembic import op @@ -13,9 +14,8 @@ from simdb.notifications import Notification - # revision identifiers, used by Alembic. -revision: str = '21f2b1287595' +revision: str = "21f2b1287595" down_revision: Union[str, Sequence[str], None] = None branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None @@ -28,95 +28,138 @@ Notification.ALL: "A", } + def upgrade() -> None: """Upgrade schema.""" # Get connection to inspect existing database schema conn = op.get_bind() inspector = sa.inspect(conn) existing_tables = inspector.get_table_names() - + # Create files table if it doesn't exist - if 'files' not in existing_tables: - op.create_table('files', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('uuid', UUID(), nullable=False), - sa.Column('usage', sa.String(length=250), nullable=True), - sa.Column('uri', URI(length=1024), nullable=True), - sa.Column('checksum', sa.String(length=64), nullable=True), - sa.Column('type', sa.Enum('UNKNOWN', 'UUID', 'FILE', 'IMAS', 'UDA', name='type'), nullable=True), - sa.Column('purpose', sa.String(length=250), nullable=True), - sa.Column('sensitivity', sa.String(length=20), nullable=True), - sa.Column('access', sa.String(length=20), nullable=True), - sa.Column('embargo', sa.String(length=20), nullable=True), - sa.Column('datetime', sa.DateTime(), nullable=False), - sa.PrimaryKeyConstraint('id') + if "files" not in existing_tables: + op.create_table( + "files", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("uuid", UUID(), nullable=False), + sa.Column("usage", sa.String(length=250), nullable=True), + sa.Column("uri", URI(length=1024), nullable=True), + sa.Column("checksum", sa.String(length=64), nullable=True), + sa.Column( + "type", + sa.Enum("UNKNOWN", "UUID", "FILE", "IMAS", "UDA", name="type"), + nullable=True, + ), + sa.Column("purpose", sa.String(length=250), nullable=True), + sa.Column("sensitivity", sa.String(length=20), nullable=True), + sa.Column("access", sa.String(length=20), nullable=True), + sa.Column("embargo", sa.String(length=20), nullable=True), + sa.Column("datetime", sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint("id"), ) - op.create_index(op.f('ix_files_uuid'), 'files', ['uuid'], unique=True) + op.create_index(op.f("ix_files_uuid"), "files", ["uuid"], unique=True) # Create simulations table if it doesn't exist - if 'simulations' not in existing_tables: - op.create_table('simulations', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('uuid', UUID(), nullable=False), - sa.Column('alias', sa.String(length=250), nullable=True), - sa.Column('datetime', sa.DateTime(), nullable=False), - sa.PrimaryKeyConstraint('id') + if "simulations" not in existing_tables: + op.create_table( + "simulations", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("uuid", UUID(), nullable=False), + sa.Column("alias", sa.String(length=250), nullable=True), + sa.Column("datetime", sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index( + op.f("ix_simulations_alias"), "simulations", ["alias"], unique=True + ) + op.create_index( + op.f("ix_simulations_uuid"), "simulations", ["uuid"], unique=True ) - op.create_index(op.f('ix_simulations_alias'), 'simulations', ['alias'], unique=True) - op.create_index(op.f('ix_simulations_uuid'), 'simulations', ['uuid'], unique=True) # Create watchers table if it doesn't exist - if 'watchers' not in existing_tables: - op.create_table('watchers', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('username', sa.String(length=250), nullable=True), - sa.Column('email', sa.String(length=1000), nullable=True), + if "watchers" not in existing_tables: + op.create_table( + "watchers", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("username", sa.String(length=250), nullable=True), + sa.Column("email", sa.String(length=1000), nullable=True), sa.Column( "notification", - ChoiceType(choices=NOTIFICATION_CHOICES, length=1, enum_type=Notification), + ChoiceType( + choices=NOTIFICATION_CHOICES, length=1, enum_type=Notification + ), nullable=True, ), - sa.PrimaryKeyConstraint('id') + sa.PrimaryKeyConstraint("id"), ) # Create metadata table if it doesn't exist - if 'metadata' not in existing_tables: - op.create_table('metadata', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('sim_id', sa.Integer(), nullable=True), - sa.Column('element', sa.String(length=250), nullable=False), - sa.Column('value', sa.PickleType(), nullable=True), - sa.ForeignKeyConstraint(['sim_id'], ['simulations.id'], ), - sa.PrimaryKeyConstraint('id') + if "metadata" not in existing_tables: + op.create_table( + "metadata", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("sim_id", sa.Integer(), nullable=True), + sa.Column("element", sa.String(length=250), nullable=False), + sa.Column("value", sa.PickleType(), nullable=True), + sa.ForeignKeyConstraint( + ["sim_id"], + ["simulations.id"], + ), + sa.PrimaryKeyConstraint("id"), ) - op.create_index(op.f('ix_metadata_sim_id'), 'metadata', ['sim_id'], unique=False) - op.create_index('metadata_index', 'metadata', ['sim_id', 'element'], unique=True) - + op.create_index( + op.f("ix_metadata_sim_id"), "metadata", ["sim_id"], unique=False + ) + op.create_index( + "metadata_index", "metadata", ["sim_id", "element"], unique=True + ) + # Create simulation_input_files table if it doesn't exist - if 'simulation_input_files' not in existing_tables: - op.create_table('simulation_input_files', - sa.Column('simulation_id', sa.Integer(), nullable=True), - sa.Column('file_id', sa.Integer(), nullable=True), - sa.ForeignKeyConstraint(['file_id'], ['files.id'], ), - sa.ForeignKeyConstraint(['simulation_id'], ['simulations.id'], ) + if "simulation_input_files" not in existing_tables: + op.create_table( + "simulation_input_files", + sa.Column("simulation_id", sa.Integer(), nullable=True), + sa.Column("file_id", sa.Integer(), nullable=True), + sa.ForeignKeyConstraint( + ["file_id"], + ["files.id"], + ), + sa.ForeignKeyConstraint( + ["simulation_id"], + ["simulations.id"], + ), ) # Create simulation_output_files table if it doesn't exist - if 'simulation_output_files' not in existing_tables: - op.create_table('simulation_output_files', - sa.Column('simulation_id', sa.Integer(), nullable=True), - sa.Column('file_id', sa.Integer(), nullable=True), - sa.ForeignKeyConstraint(['file_id'], ['files.id'], ), - sa.ForeignKeyConstraint(['simulation_id'], ['simulations.id'], ) + if "simulation_output_files" not in existing_tables: + op.create_table( + "simulation_output_files", + sa.Column("simulation_id", sa.Integer(), nullable=True), + sa.Column("file_id", sa.Integer(), nullable=True), + sa.ForeignKeyConstraint( + ["file_id"], + ["files.id"], + ), + sa.ForeignKeyConstraint( + ["simulation_id"], + ["simulations.id"], + ), ) # Create simulation_watchers table if it doesn't exist - if 'simulation_watchers' not in existing_tables: - op.create_table('simulation_watchers', - sa.Column('simulation_id', sa.Integer(), nullable=True), - sa.Column('watcher_id', sa.Integer(), nullable=True), - sa.ForeignKeyConstraint(['simulation_id'], ['simulations.id'], ), - sa.ForeignKeyConstraint(['watcher_id'], ['watchers.id'], ) + if "simulation_watchers" not in existing_tables: + op.create_table( + "simulation_watchers", + sa.Column("simulation_id", sa.Integer(), nullable=True), + sa.Column("watcher_id", sa.Integer(), nullable=True), + sa.ForeignKeyConstraint( + ["simulation_id"], + ["simulations.id"], + ), + sa.ForeignKeyConstraint( + ["watcher_id"], + ["watchers.id"], + ), ) # ### end Alembic commands ### @@ -124,16 +167,16 @@ def upgrade() -> None: def downgrade() -> None: """Downgrade schema.""" # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('simulation_watchers') - op.drop_table('simulation_output_files') - op.drop_table('simulation_input_files') - op.drop_index('metadata_index', table_name='metadata') - op.drop_index(op.f('ix_metadata_sim_id'), table_name='metadata') - op.drop_table('metadata') - op.drop_table('watchers') - op.drop_index(op.f('ix_simulations_uuid'), table_name='simulations') - op.drop_index(op.f('ix_simulations_alias'), table_name='simulations') - op.drop_table('simulations') - op.drop_index(op.f('ix_files_uuid'), table_name='files') - op.drop_table('files') + op.drop_table("simulation_watchers") + op.drop_table("simulation_output_files") + op.drop_table("simulation_input_files") + op.drop_index("metadata_index", table_name="metadata") + op.drop_index(op.f("ix_metadata_sim_id"), table_name="metadata") + op.drop_table("metadata") + op.drop_table("watchers") + op.drop_index(op.f("ix_simulations_uuid"), table_name="simulations") + op.drop_index(op.f("ix_simulations_alias"), table_name="simulations") + op.drop_table("simulations") + op.drop_index(op.f("ix_files_uuid"), table_name="files") + op.drop_table("files") # ### end Alembic commands ### From dbc96bccd46fb8fd892acc764568ae4cd6b0ebfa Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Fri, 13 Feb 2026 10:35:43 +0100 Subject: [PATCH 07/24] let Base.metadata handle all models --- alembic/env.py | 7 ------- alembic/versions/21f2b1287595_create_init_tables.py | 4 ++-- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/alembic/env.py b/alembic/env.py index 0778898a..36552eca 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -16,13 +16,6 @@ sys.path.insert(0, str(SRC_PATH)) from simdb.database.models import Base # noqa -from simdb.database.models import ( # noqa - file, - metadata, - simulation, - watcher, -) -from simdb.database.models import types # noqa target_metadata = Base.metadata diff --git a/alembic/versions/21f2b1287595_create_init_tables.py b/alembic/versions/21f2b1287595_create_init_tables.py index 0cf80b97..68b8aa0c 100644 --- a/alembic/versions/21f2b1287595_create_init_tables.py +++ b/alembic/versions/21f2b1287595_create_init_tables.py @@ -8,12 +8,12 @@ from typing import Sequence, Union -from alembic import op import sqlalchemy as sa + +from alembic import op from simdb.database.models.types import URI, UUID, ChoiceType from simdb.notifications import Notification - # revision identifiers, used by Alembic. revision: str = "21f2b1287595" down_revision: Union[str, Sequence[str], None] = None From 1d5b15bb06b0970f1dafc38b0a8887ccda3fb5f3 Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Fri, 27 Feb 2026 10:53:21 +0100 Subject: [PATCH 08/24] switch from metadata field to json column --- ...3aa2429_convert_metadata_to_json_column.py | 130 ++++++ src/simdb/database/database.py | 412 ++++++++---------- src/simdb/database/models/simulation.py | 147 ++++--- src/simdb/remote/apis/v1/simulations.py | 6 +- src/simdb/remote/apis/v1_1/simulations.py | 10 +- src/simdb/remote/apis/v1_2/simulations.py | 3 +- 6 files changed, 418 insertions(+), 290 deletions(-) create mode 100644 alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py diff --git a/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py new file mode 100644 index 00000000..f8891d2a --- /dev/null +++ b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py @@ -0,0 +1,130 @@ +"""convert_metadata_to_json_column + +Revision ID: 28bee3aa2429 +Revises: 9e9a4a7cd639 +Create Date: 2026-02-26 17:01:30.925750 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy import text +from sqlalchemy.dialects import postgresql + +revision: str = '28bee3aa2429' +down_revision: Union[str, Sequence[str], None] = '9e9a4a7cd639' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + conn = op.get_bind() + + # Add metadata JSON column to simulations table + # Use JSON type for PostgreSQL, Text for SQLite (will store JSON as text) + if conn.dialect.name == 'postgresql': + op.add_column('simulations', sa.Column('metadata', postgresql.JSON(astext_type=sa.Text()), nullable=True)) + else: + op.add_column('simulations', sa.Column('metadata', sa.Text(), nullable=True)) + + # Migrate existing metadata from metadata table to JSON column + # First, we need to aggregate metadata by simulation + if conn.dialect.name == 'postgresql': + # PostgreSQL: Use json_object_agg + migration_query = text(""" + UPDATE simulations + SET metadata = subq.meta_json + FROM ( + SELECT sim_id, json_object_agg(element, value) as meta_json + FROM metadata + GROUP BY sim_id + ) AS subq + WHERE simulations.id = subq.sim_id + """) + conn.execute(migration_query) + else: + # SQLite: Build JSON manually using group_concat + # This is more complex, we'll handle it per simulation + result = conn.execute(text("SELECT DISTINCT sim_id FROM metadata")) + sim_ids = [row[0] for row in result] + + for sim_id in sim_ids: + # Get all metadata for this simulation + meta_rows = conn.execute( + text("SELECT element, value FROM metadata WHERE sim_id = :sim_id"), + {"sim_id": sim_id} + ) + + # Build JSON object + import json + import pickle + meta_dict = {} + for element, value in meta_rows: + # Value is stored as pickle, need to deserialize + if value is not None: + try: + meta_dict[element] = pickle.loads(value) if isinstance(value, bytes) else value + except: + meta_dict[element] = value + else: + meta_dict[element] = None + + conn.execute( + text("UPDATE simulations SET metadata = :metadata WHERE id = :sim_id"), + {"metadata": json.dumps(meta_dict), "sim_id": sim_id} + ) + + op.drop_index('metadata_index', table_name='metadata') + op.drop_index(op.f('ix_metadata_sim_id'), table_name='metadata') + op.drop_table('metadata') + + +def downgrade() -> None: + """Downgrade schema.""" + conn = op.get_bind() + + # Recreate metadata table + op.create_table( + 'metadata', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('sim_id', sa.Integer(), nullable=True), + sa.Column('element', sa.String(length=250), nullable=False), + sa.Column('value', sa.PickleType(), nullable=True), + sa.ForeignKeyConstraint(['sim_id'], ['simulations.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_metadata_sim_id'), 'metadata', ['sim_id'], unique=False) + op.create_index('metadata_index', 'metadata', ['sim_id', 'element'], unique=True) + + # Migrate data back from JSON column to metadata table + if conn.dialect.name == 'postgresql': + migration_query = text(""" + INSERT INTO metadata (sim_id, element, value) + SELECT s.id, kv.key, kv.value::text + FROM simulations s, json_each_text(s.metadata::json) kv + WHERE s.metadata IS NOT NULL + """) + conn.execute(migration_query) + else: + # SQLite: Parse JSON and insert rows + import json + import pickle + + result = conn.execute(text("SELECT id, metadata FROM simulations WHERE metadata IS NOT NULL")) + for sim_id, metadata_json in result: + if metadata_json: + try: + meta_dict = json.loads(metadata_json) + for element, value in meta_dict.items(): + # Pickle the value for storage + pickled_value = pickle.dumps(value, 0) + conn.execute( + text("INSERT INTO metadata (sim_id, element, value) VALUES (:sim_id, :element, :value)"), + {"sim_id": sim_id, "element": element, "value": pickled_value} + ) + except: + pass + + op.drop_column('simulations', 'metadata') diff --git a/src/simdb/database/database.py b/src/simdb/database/database.py index 37ba0071..ac61725b 100644 --- a/src/simdb/database/database.py +++ b/src/simdb/database/database.py @@ -1,4 +1,5 @@ import contextlib +import json import sys import uuid from datetime import datetime @@ -19,7 +20,6 @@ from .models import Base from .models.file import File -from .models.metadata import MetaData from .models.simulation import Simulation @@ -176,38 +176,82 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): self.close() - def _get_simulation_data(self, limit, query, meta_keys, page) -> Tuple[int, List]: - if limit: - limit = limit * len(meta_keys) if meta_keys else limit - limit_query = query.limit(limit).offset((page - 1) * limit) - else: - limit_query = self.get_simulation_data(query) - data = {} - for row in limit_query: - data.setdefault( - row.simulation.uuid, - { - "alias": row.simulation.alias, - "uuid": row.simulation.uuid, - "datetime": row.simulation.datetime.isoformat(), - "metadata": [], - }, - ) + def _get_simulation_data(self, query, meta_keys, limit, page, sort_by="", sort_asc=False) -> Tuple[int, List]: + """ + Build simulation data from query results with JSON metadata. + + :param query: SQLAlchemy query object + :param meta_keys: List of metadata keys to include + :param limit: Maximum number of results per page + :param page: Page number (1-indexed) + :param sort_by: Field name to sort by (can be alias, uuid, datetime, or a metadata key) + :param sort_asc: Sort in ascending order if True, descending if False + :return: Tuple of (total_count, list of simulation dicts) + """ + total_count = query.count() + + all_rows = query.all() + + results = [] + for row in all_rows: + sim_data = { + "alias": row.alias, + "uuid": row.uuid, + "datetime": row.datetime.isoformat(), + } + + metadata_json = row._metadata + if metadata_json: + if isinstance(metadata_json, str): + try: + meta_dict = json.loads(metadata_json) + except (json.JSONDecodeError, TypeError): + meta_dict = {} + else: + meta_dict = metadata_json if isinstance(metadata_json, dict) else {} + else: + meta_dict = {} + + sim_data["_meta_dict"] = meta_dict + if meta_keys: - data[row.simulation.uuid]["metadata"].append( - {"element": row.metadata.element, "value": row.metadata.value} - ) - if meta_keys: - return query.count() / len(meta_keys), list(data.values()) - else: - return query.count(), list(data.values()) + sim_data["metadata"] = [ + {"element": k, "value": v} + for k, v in meta_dict.items() + if k in meta_keys + ] + + results.append(sim_data) + + if sort_by: + def get_sort_key(item): + if sort_by in ("alias", "uuid", "datetime"): + val = item.get(sort_by, "") + else: + val = item.get("_meta_dict", {}).get(sort_by, "") + # Handle None values - put them at the end + if val is None: + return ("", "") if sort_asc else ("~", "~") + # Convert to string for consistent sorting + return str(val).lower() if isinstance(val, str) else str(val) + + results.sort(key=get_sort_key, reverse=not sort_asc) + + for sim_data in results: + sim_data.pop("_meta_dict", None) + + if limit: + start_idx = (page - 1) * limit + end_idx = start_idx + limit + results = results[start_idx:end_idx] + + return total_count, results def _find_simulation(self, sim_ref: str) -> "Simulation": try: sim_uuid = uuid.UUID(sim_ref) simulation = ( self.session.query(Simulation) - .options(joinedload(Simulation.meta)) .filter_by(uuid=sim_uuid) .one_or_none() ) @@ -215,7 +259,6 @@ def _find_simulation(self, sim_ref: str) -> "Simulation": try: simulation = ( self.session.query(Simulation) - .options(joinedload(Simulation.meta)) .filter( sql_or( sql_cast(Simulation.uuid, Text).startswith(sim_ref), @@ -258,22 +301,10 @@ def list_simulations( :return: A list of Simulations. """ - - if meta_keys: - query = ( - self.session.query(Simulation) - .options(joinedload(Simulation.meta)) - .outerjoin(Simulation.meta) - .filter(MetaData.element.in_(meta_keys)) - ) - if limit: - query = query.limit(limit) - return query.all() - else: - query = self.session.query(Simulation) - if limit: - query = query.limit(limit) - return query.all() + query = self.session.query(Simulation) + if limit: + query = query.limit(limit) + return query.all() def list_simulation_data( self, @@ -286,62 +317,11 @@ def list_simulation_data( """ Return a list of all the simulations stored in the database. - :return: A list of Simulations. + :return: A tuple of (total_count, list of simulation data dicts). """ - - sort_query = None - if sort_by: - sort_dir = asc if sort_asc else desc - sort_query = ( - self.session.query( - Simulation.id, - func.row_number() - .over(order_by=sort_dir(MetaData.value)) - .label("row_num"), - ) - .join(Simulation.meta) - .filter(MetaData.element == sort_by) - .subquery() - ) - - if meta_keys: - s_b = Bundle( - "simulation", Simulation.alias, Simulation.uuid, Simulation.datetime - ) - m_b = Bundle("metadata", MetaData.element, MetaData.value) - query = self.session.query(s_b, m_b).outerjoin(Simulation.meta) - - names_filters = [] - for name in meta_keys: - if name in ("alias", "uuid"): - continue - names_filters.append(m_b.c.element.ilike(name)) # type: ignore[union-attr] - if names_filters: - query = query.filter(or_(*names_filters)) - - if sort_query is not None: - query = query.join( - sort_query, Simulation.id == sort_query.c.id - ).order_by(sort_query.c.row_num) - - return self._get_simulation_data(limit, query, meta_keys, page) - else: - query = self.session.query( - Simulation.alias, Simulation.uuid, Simulation.datetime - ) - - if sort_query is not None: - query = query.join( - sort_query, Simulation.id == sort_query.c.id - ).order_by(sort_query.c.row_num) - - limit_query = ( - query.limit(limit).offset((page - 1) * limit) if limit else query - ) - return query.count(), [ - {"alias": alias, "uuid": uuid, "datetime": datetime.isoformat()} - for alias, uuid, datetime in limit_query - ] + query = self.session.query(Simulation) + + return self._get_simulation_data(query, meta_keys, limit, page, sort_by, sort_asc) def get_simulation_data(self, query): limit_query = query @@ -372,98 +352,85 @@ def delete_simulation(self, sim_ref: str) -> "Simulation": self.session.commit() return simulation - def _get_metadata( + def _get_sim_ids_from_json( self, constraints: List[Tuple[str, str, "QueryType"]] - ) -> Iterable: - m_b = Bundle("metadata", MetaData.element, MetaData.value) - s_b = Bundle("simulation", Simulation.id, Simulation.alias, Simulation.uuid) - query = self.session.query(m_b, s_b).join(Simulation) + ) -> Iterable[int]: + query = self.session.query(Simulation.id, Simulation._metadata, + Simulation.alias, Simulation.uuid, Simulation.datetime) + + sim_id_sets = {} for name, value, query_type in constraints: - date_time = datetime.now() - if name == "creation_date": - date_time = datetime.strptime( - value.replace("_", ":"), "%Y-%m-%d %H:%M:%S" - ) - if query == QueryType.NONE: - pass - elif query_type == QueryType.EQ: - if name == "alias": + sim_id_sets[(name, value, query_type)] = set() + + for name, value, query_type in constraints: + if name == "alias": + if query_type == QueryType.EQ: query = query.filter(func.lower(Simulation.alias) == value.lower()) - elif name == "uuid": - query = query.filter(Simulation.uuid == uuid.UUID(value)) - elif name == "creation_date": - query = query.filter(Simulation.datetime == date_time) - elif query_type == QueryType.IN: - if name == "alias": + elif query_type == QueryType.IN: query = query.filter(Simulation.alias.ilike(f"%{value}%")) - elif name == "uuid": + elif query_type == QueryType.NI: + query = query.filter(Simulation.alias.notilike(f"%{value}%")) + elif query_type == QueryType.NE: + query = query.filter(func.lower(Simulation.alias) != value.lower()) + elif name == "uuid": + if query_type == QueryType.EQ: + query = query.filter(Simulation.uuid == uuid.UUID(value)) + elif query_type == QueryType.IN: query = query.filter( - func.REPLACE(cast(Simulation.uuid, String), "-", "").ilike( + func.REPLACE(sql_cast(Simulation.uuid, String), "-", "").ilike( "%{}%".format(value.replace("-", "")) ) ) - elif query_type == QueryType.NI: - if name == "alias": - query = query.filter(Simulation.alias.notilike(f"%{value}%")) - elif name == "uuid": + elif query_type == QueryType.NI: query = query.filter( - func.REPLACE(cast(Simulation.uuid, String), "-", "").notilike( + func.REPLACE(sql_cast(Simulation.uuid, String), "-", "").notilike( "%{}%".format(value.replace("-", "")) ) ) - elif query_type == QueryType.GT: - if name == "creation_date": + elif query_type == QueryType.NE: + query = query.filter(Simulation.uuid != uuid.UUID(value)) + elif name == "creation_date": + date_time = datetime.strptime(value.replace("_", ":"), "%Y-%m-%d %H:%M:%S") + if query_type == QueryType.EQ: + query = query.filter(Simulation.datetime == date_time) + elif query_type == QueryType.GT: query = query.filter(Simulation.datetime > date_time) - elif query_type == QueryType.GE: - if name == "creation_date": + elif query_type == QueryType.GE: query = query.filter(Simulation.datetime >= date_time) - elif query_type == QueryType.LT: - if name == "creation_date": + elif query_type == QueryType.LT: query = query.filter(Simulation.datetime < date_time) - elif query_type == QueryType.LE: - if name == "creation_date": + elif query_type == QueryType.LE: query = query.filter(Simulation.datetime <= date_time) - elif query_type == QueryType.NE: - if name == "creation_date": + elif query_type == QueryType.NE: query = query.filter(Simulation.datetime != date_time) - if name == "alias": - query = query.filter(func.lower(Simulation.alias) != value.lower()) - if name == "uuid": - query = query.filter(Simulation.uuid != uuid.UUID(value)) - elif name in ("uuid", "alias"): - raise ValueError(f"Invalid query type {query_type} for alias or uuid.") - names_filters = [] - for name, _, _ in constraints: - if name in ("alias", "uuid", "creation_date"): - continue - names_filters.append(MetaData.element.ilike(name)) - if names_filters: - query = query.filter(or_(*names_filters)) - - return query - - def _get_sim_ids( - self, constraints: List[Tuple[str, str, "QueryType"]] - ) -> Iterable[int]: - rows = self._get_metadata(constraints) - - sim_id_sets = {} - for name, value, query_type in constraints: - sim_id_sets[(name, value, query_type)] = set() - + + # Execute query and filter on JSON metadata in Python + rows = query.all() + for row in rows: + if row._metadata: + if isinstance(row._metadata, str): + try: + meta_dict = json.loads(row._metadata) + except (json.JSONDecodeError, TypeError): + meta_dict = {} + else: + meta_dict = row._metadata if isinstance(row._metadata, dict) else {} + else: + meta_dict = {} + for name, value, query_type in constraints: if name in ("alias", "uuid", "creation_date"): - sim_id_sets[(name, value, query_type)].add(row.simulation.id) - if row.metadata.element == name and ( - query_type == QueryType.EXIST - or query_compare(query_type, name, row.metadata.value, value) - ): - sim_id_sets[(name, value, query_type)].add(row.simulation.id) - + sim_id_sets[(name, value, query_type)].add(row.id) + elif name in meta_dict: + if query_type == QueryType.EXIST or query_compare( + query_type, name, meta_dict[name], value + ): + sim_id_sets[(name, value, query_type)].add(row.id) + if sim_id_sets: return set.intersection(*sim_id_sets.values()) - + return [] def query_meta( @@ -475,15 +442,11 @@ def query_meta( :return: """ - sim_ids = self._get_sim_ids(constraints) + sim_ids = self._get_sim_ids_from_json(constraints) if not sim_ids: return [] - query = ( - self.session.query(Simulation) - .options(joinedload(Simulation.meta)) - .filter(Simulation.id.in_(sim_ids)) - ) + query = self.session.query(Simulation).filter(Simulation.id.in_(sim_ids)) return query.all() def query_meta_data( @@ -501,49 +464,13 @@ def query_meta_data( :return: """ - sim_ids = self._get_sim_ids(constraints) + sim_ids = self._get_sim_ids_from_json(constraints) if not sim_ids: return 0, [] - sort_query = None - if sort_by: - sort_dir = asc if sort_asc else desc - sort_query = ( - self.session.query( - Simulation.id, - func.row_number() - .over(order_by=sort_dir(MetaData.value)) - .label("row_num"), - ) - .join(Simulation.meta) - .filter(MetaData.element == sort_by) - .subquery() - ) - - s_b = Bundle( - "simulation", - Simulation.id, - Simulation.alias, - Simulation.uuid, - Simulation.datetime, - ) - m_b = Bundle("metadata", MetaData.element, MetaData.value) - if meta_keys: - query = ( - self.session.query(s_b, m_b) - .outerjoin(Simulation.meta) - .filter(s_b.c.id.in_(sim_ids)) # type: ignore[union-attr] - ) - query = query.filter(m_b.c.element.in_(meta_keys)) # type: ignore[union-attr] - else: - query = self.session.query(s_b).filter(s_b.c.id.in_(sim_ids)) # type: ignore[union-attr] - - if sort_query is not None: - query = query.join(sort_query, Simulation.id == sort_query.c.id).order_by( - sort_query.c.row_num - ) - - return self._get_simulation_data(limit, query, meta_keys, page) + query = self.session.query(Simulation).filter(Simulation.id.in_(sim_ids)) + + return self._get_simulation_data(query, meta_keys, limit, page, sort_by, sort_asc) def get_simulation(self, sim_ref: str) -> "Simulation": """ @@ -611,11 +538,11 @@ def get_metadata(self, sim_ref: str, name: str) -> List[str]: :param sim_ref: the simulation identifier :param name: the metadata key - :return: The matching MetaData. + :return: The matching metadata values. """ simulation = self._find_simulation(sim_ref) self.session.commit() - return [m.value for m in simulation.meta if m.element == name] + return simulation.find_meta(name) def add_watcher(self, sim_ref: str, watcher: "Watcher"): sim = self._find_simulation(sim_ref) @@ -635,28 +562,51 @@ def list_watchers(self, sim_ref: str) -> List["Watcher"]: return self._find_simulation(sim_ref).watchers def list_metadata_keys(self) -> List[dict]: - if self.engine.dialect.name == "postgresql": - query = self.session.query(MetaData.element, MetaData.value).distinct( - MetaData.element - ) - else: - query = self.session.query(MetaData.element, MetaData.value).group_by( - MetaData.element - ) - return [{"name": row[0], "type": type(row[1]).__name__} for row in query.all()] + simulations = self.session.query(Simulation._metadata).all() + + keys_dict = {} + for (metadata_json,) in simulations: + if metadata_json: + if isinstance(metadata_json, str): + try: + meta_dict = json.loads(metadata_json) + except (json.JSONDecodeError, TypeError): + continue + else: + meta_dict = metadata_json if isinstance(metadata_json, dict) else {} + + for key, value in meta_dict.items(): + if key not in keys_dict: + keys_dict[key] = value + + return [{"name": k, "type": type(v).__name__} for k, v in keys_dict.items()] def list_metadata_values(self, name: str) -> List[str]: if name == "alias": query = self.session.query(Simulation.alias).filter( Simulation.alias is not None ) + data = [row[0] for row in query.all()] else: - query = ( - self.session.query(MetaData.value) - .filter(MetaData.element == name) - .distinct() - ) - data = [row[0] for row in query.all()] + simulations = self.session.query(Simulation._metadata).all() + values_set = set() + + for (metadata_json,) in simulations: + if metadata_json: + if isinstance(metadata_json, str): + try: + meta_dict = json.loads(metadata_json) + except (json.JSONDecodeError, TypeError): + continue + else: + meta_dict = metadata_json if isinstance(metadata_json, dict) else {} + + if name in meta_dict: + val = meta_dict[name] + values_set.add(str(val) if val is not None else None) + + data = list(values_set) + try: return sorted(data) except TypeError: diff --git a/src/simdb/database/models/simulation.py b/src/simdb/database/models/simulation.py index 1ee21ab6..abbc6fa1 100644 --- a/src/simdb/database/models/simulation.py +++ b/src/simdb/database/models/simulation.py @@ -1,4 +1,5 @@ import itertools +import json import sys import uuid from collections import defaultdict @@ -14,6 +15,7 @@ from sqlalchemy import Column, ForeignKey, Table from sqlalchemy import types as sql_types +from sqlalchemy.dialects import postgresql from sqlalchemy.orm import relationship if "sphinx" in sys.modules: @@ -41,7 +43,6 @@ from .base import Base from .file import File -from .metadata import MetaData from .types import UUID from .utils import checked_get, flatten_dict, unflatten_dict from .watcher import Watcher @@ -80,6 +81,17 @@ def _update_legacy_uri(data_object: DataObject): return URI(f"imas:{backend}?path={path}") +class MetaDataWrapper: + """Temporary wrapper class to provide backwards compatibility with MetaData interface.""" + + def __init__(self, element: str, value: Any): + self.element = element + self.value = value + + def data(self, recurse: bool = False) -> Dict[str, Any]: + return {"element": self.element, "value": self.value} + + @inherit_docstrings class Simulation(Base): """ @@ -99,19 +111,49 @@ class Status(Enum): uuid = Column(UUID, nullable=False, unique=True, index=True) alias = Column(sql_types.String(250), nullable=True, unique=True, index=True) datetime = Column(sql_types.DateTime, nullable=False) + _metadata = Column( + "metadata", + postgresql.JSON(astext_type=sql_types.Text()).with_variant(sql_types.Text(), "sqlite"), + nullable=True, + default=dict + ) inputs: List["File"] = relationship( "File", secondary=simulation_input_files, backref="input_for" ) outputs: List["File"] = relationship( "File", secondary=simulation_output_files, backref="output_of" ) - meta: List["MetaData"] = relationship( - "MetaData", lazy="raise", cascade="all, delete-orphan" - ) watchers: List["Watcher"] = relationship( "Watcher", secondary=simulation_watchers, lazy="dynamic" ) + @property + def meta(self) -> List[MetaDataWrapper]: + """ + Property to provide backwards compatibility. + Returns a list of MetaDataWrapper objects from the JSON metadata. + """ + meta_dict = self._get_metadata_dict() + return [MetaDataWrapper(k, v) for k, v in meta_dict.items()] + + @meta.setter + def meta(self, value: List): + """Setter for backwards compatibility - not typically used.""" + pass + + def _get_metadata_dict(self) -> Dict[str, Any]: + if self._metadata is None: + return {} + if isinstance(self._metadata, str): + try: + return json.loads(self._metadata) + except (json.JSONDecodeError, TypeError): + return {} + return self._metadata if isinstance(self._metadata, dict) else {} + + def _set_metadata_dict(self, meta_dict: Dict[str, Any]) -> None: + self._metadata = meta_dict + def __init__( self, manifest: Union[Manifest, None], config: Optional[Config] = None ) -> None: @@ -123,14 +165,16 @@ def __init__( """ if manifest is None: + self._metadata = {} return self.uuid = uuid.uuid1() self.datetime = datetime.now() + self._metadata = {} # For legacy simulation import responsible_name is from manifest else it will be # the user.email if manifest.responsible_name: - self.meta.append(MetaData("uploaded_by", manifest.responsible_name)) + self.set_meta("uploaded_by", manifest.responsible_name) self.user = getuser() @@ -160,9 +204,7 @@ def __init__( self.inputs.append(file) if all_input_idss: - self.meta.append( - MetaData("input_ids", "[{}]".format(", ".join(all_input_idss))) - ) + self.set_meta("input_ids", "[{}]".format(", ".join(all_input_idss))) all_output_idss = [] @@ -184,7 +226,7 @@ def __init__( flatten_dict(flattened_meta, meta) for key, value in flattened_meta.items(): - self.meta.append(MetaData(key, value)) + self.set_meta(key, value) file = File(output.type, output.uri, all_output_idss, config=config) if output.type == DataObject.Type.IMAS and "path" not in output.uri.query: @@ -193,7 +235,7 @@ def __init__( self.outputs.append(file) if all_output_idss: - self.meta.append(MetaData("ids", "[{}]".format(", ".join(all_output_idss)))) + self.set_meta("ids", "[{}]".format(", ".join(all_output_idss))) flattened_dict: Dict[str, str] = {} flatten_dict(flattened_dict, manifest.metadata) @@ -210,9 +252,7 @@ def __init__( def status(self) -> Optional["Simulation.Status"]: result = self.find_meta("status") if result: - value = ( - result[0].value if result[0].value != "invalidated" else "not validated" - ) + value = result[0] if result[0] != "invalidated" else "not validated" return Simulation.Status(value) return None @@ -229,25 +269,27 @@ def __str__(self): getattr(self, name), ) result += "metadata:\n" - for meta in self.meta: + meta_dict = self._get_metadata_dict() + for element, value in meta_dict.items(): if ( - isinstance(meta.value, Iterable) - and not isinstance(meta.value, np.ndarray) - and "\n" in meta.value + isinstance(value, Iterable) + and not isinstance(value, np.ndarray) + and isinstance(value, str) + and "\n" in value ): first_line = True - for line in meta.value.split("\n"): + for line in value.split("\n"): if first_line: - result += f" {meta.element}: {line}\n" + result += f" {element}: {line}\n" elif line: - indent = " " * (len(meta.element) + 2) + indent = " " * (len(element) + 2) result += f" {indent}{line}" first_line = False - elif isinstance(meta.value, np.ndarray): - string = np.array2string(meta.value, threshold=10) - result += f" {meta.element}: {string}\n" + elif isinstance(value, np.ndarray): + string = np.array2string(value, threshold=10) + result += f" {element}: {string}\n" else: - result += f" {meta.element}: {meta.value}\n" + result += f" {element}: {value}\n" result += "inputs:\n" for file in self.inputs: result += f"{file}\n" @@ -256,38 +298,32 @@ def __str__(self): result += f"{file}\n" return result - def find_meta(self, name: str) -> List["MetaData"]: - return [m for m in self.meta if m.element == name] + def find_meta(self, name: str) -> List[Any]: + meta_dict = self._get_metadata_dict() + if name in meta_dict: + return [meta_dict[name]] + return [] def remove_meta(self, name: str) -> None: - self.meta = [m for m in self.meta if m.element != name] + meta_dict = self._get_metadata_dict() + if name in meta_dict: + del meta_dict[name] + self._set_metadata_dict(meta_dict) - def set_meta(self, name: str, value: str) -> None: - for m in self.meta: - if m.element == name: - m.value = value - break - else: - self.meta.append(MetaData(name, value)) + def set_meta(self, name: str, value: Any) -> None: + meta_dict = self._get_metadata_dict() + meta_dict[name] = value + self._set_metadata_dict(meta_dict) def validate_meta(self) -> None: """ - Check the metadata elements for duplicates, throwing and exception if found. + Check the metadata elements for duplicates, throwing an exception if found. - Duplicates should not be possible but if there is an issue causing them to arise - then at least it will be caught early rather than causing an SQL constraint - failure later. + With JSON storage, duplicates are not possible by design (dict keys are unique), + but we keep this method for backwards compatibility. """ - names = [m.element for m in self.meta] - counts = defaultdict(lambda: 0) - for name in names: - counts[name] += 1 - duplicates = [k for (k, v) in counts.items() if v > 1] - if len(duplicates) > 0: - raise ValueError( - f"Duplicate metadata elements {duplicates} found for simulation " - f"{self.uuid}" - ) + # With JSON/dict storage, duplicates are impossible + pass def file_paths(self) -> Set[Path]: def _get_path(file: File) -> Optional[Path]: @@ -330,10 +366,13 @@ def from_data(cls, data: Dict[str, Union[str, Dict, List]]) -> "Simulation": simulation.outputs = [File.from_data(el) for el in outputs] if "metadata" in data: metadata = checked_get(data, "metadata", list) + meta_dict = {} for el in metadata: if not isinstance(el, dict): raise Exception("corrupted metadata element - expected dictionary") - simulation.meta.append(MetaData.from_data(el)) + if "element" in el and "value" in el: + meta_dict[el["element"]] = el["value"] + simulation._set_metadata_dict(meta_dict) return simulation def data( @@ -347,13 +386,17 @@ def data( if recurse: data["inputs"] = [f.data(recurse=True) for f in self.inputs] data["outputs"] = [f.data(recurse=True) for f in self.outputs] - data["metadata"] = [m.data(recurse=True) for m in self.meta] + meta_dict = self._get_metadata_dict() + data["metadata"] = [{"element": k, "value": v} for k, v in meta_dict.items()] elif meta_keys: + meta_dict = self._get_metadata_dict() data["metadata"] = [ - m.data(recurse=True) for m in self.meta if m.element in meta_keys + {"element": k, "value": v} + for k, v in meta_dict.items() + if k in meta_keys ] return data def meta_dict(self) -> Dict[str, Union[Dict, Any]]: - meta = {m.element: m.value for m in self.meta} + meta = self._get_metadata_dict() return unflatten_dict(meta) diff --git a/src/simdb/remote/apis/v1/simulations.py b/src/simdb/remote/apis/v1/simulations.py index d171513d..c8ca78a9 100644 --- a/src/simdb/remote/apis/v1/simulations.py +++ b/src/simdb/remote/apis/v1/simulations.py @@ -10,7 +10,6 @@ from flask_restx import Namespace, Resource from simdb.database import DatabaseError -from simdb.database.models import metadata as models_meta from simdb.database.models import simulation as models_sim from simdb.email.server import EmailServer from simdb.query import QueryType, parse_query_arg @@ -123,6 +122,9 @@ def _build_trace(sim_id: str) -> dict: @api.route("/simulations") class SimulationList(Resource): + LIMIT_HEADER = APIConstants.LIMIT_HEADER + PAGE_HEADER = APIConstants.PAGE_HEADER + parser = api.parser() parser.add_argument( APIConstants.LIMIT_HEADER, @@ -183,7 +185,7 @@ def post(self, user: User): alias = data["simulation"]["alias"] (updated_alias, next_id) = _set_alias(alias) if updated_alias: - simulation.meta.append(models_meta.MetaData("seqid", next_id)) + simulation.set_meta("seqid", next_id) simulation.alias = updated_alias else: simulation.alias = alias diff --git a/src/simdb/remote/apis/v1_1/simulations.py b/src/simdb/remote/apis/v1_1/simulations.py index 5bc1eadd..a10a211c 100644 --- a/src/simdb/remote/apis/v1_1/simulations.py +++ b/src/simdb/remote/apis/v1_1/simulations.py @@ -10,7 +10,6 @@ from flask_restx import Namespace, Resource from simdb.database import DatabaseError -from simdb.database.models import metadata as models_meta from simdb.database.models import simulation as models_sim from simdb.email.server import EmailServer from simdb.query import QueryType, parse_query_arg @@ -126,6 +125,11 @@ def _build_trace(sim_id: str) -> dict: @api.route("/simulations") class SimulationList(Resource): + LIMIT_HEADER = APIConstants.LIMIT_HEADER + PAGE_HEADER = APIConstants.PAGE_HEADER + SORT_BY_HEADER = APIConstants.SORT_BY_HEADER + SORT_ASC_HEADER = APIConstants.SORT_ASC_HEADER + parser = api.parser() parser.add_argument( APIConstants.LIMIT_HEADER, @@ -207,13 +211,13 @@ def post(self, user: User): return error("Simulation data not provided") simulation = models_sim.Simulation.from_data(data["simulation"]) - simulation.meta.append(models_meta.MetaData("uploaded_by", user.name)) + simulation.set_meta("uploaded_by", user.name) if "alias" in data["simulation"]: alias = data["simulation"]["alias"] (updated_alias, next_id) = _set_alias(alias) if updated_alias: - simulation.meta.append(models_meta.MetaData("seqid", next_id)) + simulation.set_meta("seqid", next_id) simulation.alias = updated_alias else: simulation.alias = alias diff --git a/src/simdb/remote/apis/v1_2/simulations.py b/src/simdb/remote/apis/v1_2/simulations.py index 37c4d12e..3d28a12a 100644 --- a/src/simdb/remote/apis/v1_2/simulations.py +++ b/src/simdb/remote/apis/v1_2/simulations.py @@ -12,7 +12,6 @@ from flask_restx import Namespace, Resource from simdb.database import DatabaseError -from simdb.database.models import metadata as models_meta from simdb.database.models import simulation as models_sim from simdb.database.models import watcher as models_watcher from simdb.email.server import EmailServer @@ -320,7 +319,7 @@ def post(self, user: User): if alias is not None: (updated_alias, next_id) = _set_alias(alias) if updated_alias: - simulation.meta.append(models_meta.MetaData("seqid", next_id)) + simulation.set_meta("seqid", next_id) simulation.alias = updated_alias else: simulation.alias = alias From 40f93a357b0f8adcd2b9422aae8eb167ed1df270 Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Fri, 27 Feb 2026 11:02:12 +0100 Subject: [PATCH 09/24] take out limit Header since its fixed in another pr --- src/simdb/remote/apis/v1/simulations.py | 3 --- src/simdb/remote/apis/v1_1/simulations.py | 5 ----- 2 files changed, 8 deletions(-) diff --git a/src/simdb/remote/apis/v1/simulations.py b/src/simdb/remote/apis/v1/simulations.py index c8ca78a9..9ecbb34f 100644 --- a/src/simdb/remote/apis/v1/simulations.py +++ b/src/simdb/remote/apis/v1/simulations.py @@ -122,9 +122,6 @@ def _build_trace(sim_id: str) -> dict: @api.route("/simulations") class SimulationList(Resource): - LIMIT_HEADER = APIConstants.LIMIT_HEADER - PAGE_HEADER = APIConstants.PAGE_HEADER - parser = api.parser() parser.add_argument( APIConstants.LIMIT_HEADER, diff --git a/src/simdb/remote/apis/v1_1/simulations.py b/src/simdb/remote/apis/v1_1/simulations.py index a10a211c..28d946e3 100644 --- a/src/simdb/remote/apis/v1_1/simulations.py +++ b/src/simdb/remote/apis/v1_1/simulations.py @@ -125,11 +125,6 @@ def _build_trace(sim_id: str) -> dict: @api.route("/simulations") class SimulationList(Resource): - LIMIT_HEADER = APIConstants.LIMIT_HEADER - PAGE_HEADER = APIConstants.PAGE_HEADER - SORT_BY_HEADER = APIConstants.SORT_BY_HEADER - SORT_ASC_HEADER = APIConstants.SORT_ASC_HEADER - parser = api.parser() parser.add_argument( APIConstants.LIMIT_HEADER, From 6b3f56c0a00c48ea5890f51343addb42bf12c7ac Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Fri, 27 Feb 2026 11:13:13 +0100 Subject: [PATCH 10/24] format --- ...3aa2429_convert_metadata_to_json_column.py | 92 +++++++++++------- src/simdb/database/database.py | 96 +++++++++++-------- src/simdb/database/models/simulation.py | 18 ++-- 3 files changed, 122 insertions(+), 84 deletions(-) diff --git a/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py index f8891d2a..20fb4147 100644 --- a/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py +++ b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py @@ -5,6 +5,7 @@ Create Date: 2026-02-26 17:01:30.925750 """ + from typing import Sequence, Union from alembic import op @@ -12,8 +13,8 @@ from sqlalchemy import text from sqlalchemy.dialects import postgresql -revision: str = '28bee3aa2429' -down_revision: Union[str, Sequence[str], None] = '9e9a4a7cd639' +revision: str = "28bee3aa2429" +down_revision: Union[str, Sequence[str], None] = "9e9a4a7cd639" branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None @@ -21,17 +22,22 @@ def upgrade() -> None: """Upgrade schema.""" conn = op.get_bind() - + # Add metadata JSON column to simulations table # Use JSON type for PostgreSQL, Text for SQLite (will store JSON as text) - if conn.dialect.name == 'postgresql': - op.add_column('simulations', sa.Column('metadata', postgresql.JSON(astext_type=sa.Text()), nullable=True)) + if conn.dialect.name == "postgresql": + op.add_column( + "simulations", + sa.Column( + "metadata", postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + ) else: - op.add_column('simulations', sa.Column('metadata', sa.Text(), nullable=True)) - + op.add_column("simulations", sa.Column("metadata", sa.Text(), nullable=True)) + # Migrate existing metadata from metadata table to JSON column # First, we need to aggregate metadata by simulation - if conn.dialect.name == 'postgresql': + if conn.dialect.name == "postgresql": # PostgreSQL: Use json_object_agg migration_query = text(""" UPDATE simulations @@ -49,57 +55,63 @@ def upgrade() -> None: # This is more complex, we'll handle it per simulation result = conn.execute(text("SELECT DISTINCT sim_id FROM metadata")) sim_ids = [row[0] for row in result] - + for sim_id in sim_ids: # Get all metadata for this simulation meta_rows = conn.execute( text("SELECT element, value FROM metadata WHERE sim_id = :sim_id"), - {"sim_id": sim_id} + {"sim_id": sim_id}, ) - + # Build JSON object import json import pickle + meta_dict = {} for element, value in meta_rows: # Value is stored as pickle, need to deserialize if value is not None: try: - meta_dict[element] = pickle.loads(value) if isinstance(value, bytes) else value + meta_dict[element] = ( + pickle.loads(value) if isinstance(value, bytes) else value + ) except: meta_dict[element] = value else: meta_dict[element] = None - + conn.execute( text("UPDATE simulations SET metadata = :metadata WHERE id = :sim_id"), - {"metadata": json.dumps(meta_dict), "sim_id": sim_id} + {"metadata": json.dumps(meta_dict), "sim_id": sim_id}, ) - - op.drop_index('metadata_index', table_name='metadata') - op.drop_index(op.f('ix_metadata_sim_id'), table_name='metadata') - op.drop_table('metadata') + + op.drop_index("metadata_index", table_name="metadata") + op.drop_index(op.f("ix_metadata_sim_id"), table_name="metadata") + op.drop_table("metadata") def downgrade() -> None: """Downgrade schema.""" conn = op.get_bind() - + # Recreate metadata table op.create_table( - 'metadata', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('sim_id', sa.Integer(), nullable=True), - sa.Column('element', sa.String(length=250), nullable=False), - sa.Column('value', sa.PickleType(), nullable=True), - sa.ForeignKeyConstraint(['sim_id'], ['simulations.id'], ), - sa.PrimaryKeyConstraint('id') + "metadata", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("sim_id", sa.Integer(), nullable=True), + sa.Column("element", sa.String(length=250), nullable=False), + sa.Column("value", sa.PickleType(), nullable=True), + sa.ForeignKeyConstraint( + ["sim_id"], + ["simulations.id"], + ), + sa.PrimaryKeyConstraint("id"), ) - op.create_index(op.f('ix_metadata_sim_id'), 'metadata', ['sim_id'], unique=False) - op.create_index('metadata_index', 'metadata', ['sim_id', 'element'], unique=True) - + op.create_index(op.f("ix_metadata_sim_id"), "metadata", ["sim_id"], unique=False) + op.create_index("metadata_index", "metadata", ["sim_id", "element"], unique=True) + # Migrate data back from JSON column to metadata table - if conn.dialect.name == 'postgresql': + if conn.dialect.name == "postgresql": migration_query = text(""" INSERT INTO metadata (sim_id, element, value) SELECT s.id, kv.key, kv.value::text @@ -111,8 +123,10 @@ def downgrade() -> None: # SQLite: Parse JSON and insert rows import json import pickle - - result = conn.execute(text("SELECT id, metadata FROM simulations WHERE metadata IS NOT NULL")) + + result = conn.execute( + text("SELECT id, metadata FROM simulations WHERE metadata IS NOT NULL") + ) for sim_id, metadata_json in result: if metadata_json: try: @@ -121,10 +135,16 @@ def downgrade() -> None: # Pickle the value for storage pickled_value = pickle.dumps(value, 0) conn.execute( - text("INSERT INTO metadata (sim_id, element, value) VALUES (:sim_id, :element, :value)"), - {"sim_id": sim_id, "element": element, "value": pickled_value} + text( + "INSERT INTO metadata (sim_id, element, value) VALUES (:sim_id, :element, :value)" + ), + { + "sim_id": sim_id, + "element": element, + "value": pickled_value, + }, ) except: pass - - op.drop_column('simulations', 'metadata') + + op.drop_column("simulations", "metadata") diff --git a/src/simdb/database/database.py b/src/simdb/database/database.py index ac61725b..e44c996d 100644 --- a/src/simdb/database/database.py +++ b/src/simdb/database/database.py @@ -176,10 +176,12 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): self.close() - def _get_simulation_data(self, query, meta_keys, limit, page, sort_by="", sort_asc=False) -> Tuple[int, List]: + def _get_simulation_data( + self, query, meta_keys, limit, page, sort_by="", sort_asc=False + ) -> Tuple[int, List]: """ Build simulation data from query results with JSON metadata. - + :param query: SQLAlchemy query object :param meta_keys: List of metadata keys to include :param limit: Maximum number of results per page @@ -189,9 +191,9 @@ def _get_simulation_data(self, query, meta_keys, limit, page, sort_by="", sort_a :return: Tuple of (total_count, list of simulation dicts) """ total_count = query.count() - + all_rows = query.all() - + results = [] for row in all_rows: sim_data = { @@ -199,7 +201,7 @@ def _get_simulation_data(self, query, meta_keys, limit, page, sort_by="", sort_a "uuid": row.uuid, "datetime": row.datetime.isoformat(), } - + metadata_json = row._metadata if metadata_json: if isinstance(metadata_json, str): @@ -211,19 +213,20 @@ def _get_simulation_data(self, query, meta_keys, limit, page, sort_by="", sort_a meta_dict = metadata_json if isinstance(metadata_json, dict) else {} else: meta_dict = {} - + sim_data["_meta_dict"] = meta_dict - + if meta_keys: sim_data["metadata"] = [ {"element": k, "value": v} for k, v in meta_dict.items() if k in meta_keys ] - + results.append(sim_data) - + if sort_by: + def get_sort_key(item): if sort_by in ("alias", "uuid", "datetime"): val = item.get(sort_by, "") @@ -234,26 +237,24 @@ def get_sort_key(item): return ("", "") if sort_asc else ("~", "~") # Convert to string for consistent sorting return str(val).lower() if isinstance(val, str) else str(val) - + results.sort(key=get_sort_key, reverse=not sort_asc) - + for sim_data in results: sim_data.pop("_meta_dict", None) - + if limit: start_idx = (page - 1) * limit end_idx = start_idx + limit results = results[start_idx:end_idx] - + return total_count, results def _find_simulation(self, sim_ref: str) -> "Simulation": try: sim_uuid = uuid.UUID(sim_ref) simulation = ( - self.session.query(Simulation) - .filter_by(uuid=sim_uuid) - .one_or_none() + self.session.query(Simulation).filter_by(uuid=sim_uuid).one_or_none() ) except ValueError: try: @@ -320,8 +321,10 @@ def list_simulation_data( :return: A tuple of (total_count, list of simulation data dicts). """ query = self.session.query(Simulation) - - return self._get_simulation_data(query, meta_keys, limit, page, sort_by, sort_asc) + + return self._get_simulation_data( + query, meta_keys, limit, page, sort_by, sort_asc + ) def get_simulation_data(self, query): limit_query = query @@ -355,13 +358,18 @@ def delete_simulation(self, sim_ref: str) -> "Simulation": def _get_sim_ids_from_json( self, constraints: List[Tuple[str, str, "QueryType"]] ) -> Iterable[int]: - query = self.session.query(Simulation.id, Simulation._metadata, - Simulation.alias, Simulation.uuid, Simulation.datetime) - + query = self.session.query( + Simulation.id, + Simulation._metadata, + Simulation.alias, + Simulation.uuid, + Simulation.datetime, + ) + sim_id_sets = {} for name, value, query_type in constraints: sim_id_sets[(name, value, query_type)] = set() - + for name, value, query_type in constraints: if name == "alias": if query_type == QueryType.EQ: @@ -383,14 +391,16 @@ def _get_sim_ids_from_json( ) elif query_type == QueryType.NI: query = query.filter( - func.REPLACE(sql_cast(Simulation.uuid, String), "-", "").notilike( - "%{}%".format(value.replace("-", "")) - ) + func.REPLACE( + sql_cast(Simulation.uuid, String), "-", "" + ).notilike("%{}%".format(value.replace("-", ""))) ) elif query_type == QueryType.NE: query = query.filter(Simulation.uuid != uuid.UUID(value)) elif name == "creation_date": - date_time = datetime.strptime(value.replace("_", ":"), "%Y-%m-%d %H:%M:%S") + date_time = datetime.strptime( + value.replace("_", ":"), "%Y-%m-%d %H:%M:%S" + ) if query_type == QueryType.EQ: query = query.filter(Simulation.datetime == date_time) elif query_type == QueryType.GT: @@ -403,10 +413,10 @@ def _get_sim_ids_from_json( query = query.filter(Simulation.datetime <= date_time) elif query_type == QueryType.NE: query = query.filter(Simulation.datetime != date_time) - + # Execute query and filter on JSON metadata in Python rows = query.all() - + for row in rows: if row._metadata: if isinstance(row._metadata, str): @@ -418,7 +428,7 @@ def _get_sim_ids_from_json( meta_dict = row._metadata if isinstance(row._metadata, dict) else {} else: meta_dict = {} - + for name, value, query_type in constraints: if name in ("alias", "uuid", "creation_date"): sim_id_sets[(name, value, query_type)].add(row.id) @@ -427,10 +437,10 @@ def _get_sim_ids_from_json( query_type, name, meta_dict[name], value ): sim_id_sets[(name, value, query_type)].add(row.id) - + if sim_id_sets: return set.intersection(*sim_id_sets.values()) - + return [] def query_meta( @@ -469,8 +479,10 @@ def query_meta_data( return 0, [] query = self.session.query(Simulation).filter(Simulation.id.in_(sim_ids)) - - return self._get_simulation_data(query, meta_keys, limit, page, sort_by, sort_asc) + + return self._get_simulation_data( + query, meta_keys, limit, page, sort_by, sort_asc + ) def get_simulation(self, sim_ref: str) -> "Simulation": """ @@ -563,7 +575,7 @@ def list_watchers(self, sim_ref: str) -> List["Watcher"]: def list_metadata_keys(self) -> List[dict]: simulations = self.session.query(Simulation._metadata).all() - + keys_dict = {} for (metadata_json,) in simulations: if metadata_json: @@ -574,11 +586,11 @@ def list_metadata_keys(self) -> List[dict]: continue else: meta_dict = metadata_json if isinstance(metadata_json, dict) else {} - + for key, value in meta_dict.items(): if key not in keys_dict: keys_dict[key] = value - + return [{"name": k, "type": type(v).__name__} for k, v in keys_dict.items()] def list_metadata_values(self, name: str) -> List[str]: @@ -590,7 +602,7 @@ def list_metadata_values(self, name: str) -> List[str]: else: simulations = self.session.query(Simulation._metadata).all() values_set = set() - + for (metadata_json,) in simulations: if metadata_json: if isinstance(metadata_json, str): @@ -599,14 +611,16 @@ def list_metadata_values(self, name: str) -> List[str]: except (json.JSONDecodeError, TypeError): continue else: - meta_dict = metadata_json if isinstance(metadata_json, dict) else {} - + meta_dict = ( + metadata_json if isinstance(metadata_json, dict) else {} + ) + if name in meta_dict: val = meta_dict[name] values_set.add(str(val) if val is not None else None) - + data = list(values_set) - + try: return sorted(data) except TypeError: diff --git a/src/simdb/database/models/simulation.py b/src/simdb/database/models/simulation.py index abbc6fa1..e78b8cfc 100644 --- a/src/simdb/database/models/simulation.py +++ b/src/simdb/database/models/simulation.py @@ -83,11 +83,11 @@ def _update_legacy_uri(data_object: DataObject): class MetaDataWrapper: """Temporary wrapper class to provide backwards compatibility with MetaData interface.""" - + def __init__(self, element: str, value: Any): self.element = element self.value = value - + def data(self, recurse: bool = False) -> Dict[str, Any]: return {"element": self.element, "value": self.value} @@ -113,9 +113,11 @@ class Status(Enum): datetime = Column(sql_types.DateTime, nullable=False) _metadata = Column( "metadata", - postgresql.JSON(astext_type=sql_types.Text()).with_variant(sql_types.Text(), "sqlite"), + postgresql.JSON(astext_type=sql_types.Text()).with_variant( + sql_types.Text(), "sqlite" + ), nullable=True, - default=dict + default=dict, ) inputs: List["File"] = relationship( "File", secondary=simulation_input_files, backref="input_for" @@ -135,7 +137,7 @@ def meta(self) -> List[MetaDataWrapper]: """ meta_dict = self._get_metadata_dict() return [MetaDataWrapper(k, v) for k, v in meta_dict.items()] - + @meta.setter def meta(self, value: List): """Setter for backwards compatibility - not typically used.""" @@ -150,7 +152,7 @@ def _get_metadata_dict(self) -> Dict[str, Any]: except (json.JSONDecodeError, TypeError): return {} return self._metadata if isinstance(self._metadata, dict) else {} - + def _set_metadata_dict(self, meta_dict: Dict[str, Any]) -> None: self._metadata = meta_dict @@ -387,7 +389,9 @@ def data( data["inputs"] = [f.data(recurse=True) for f in self.inputs] data["outputs"] = [f.data(recurse=True) for f in self.outputs] meta_dict = self._get_metadata_dict() - data["metadata"] = [{"element": k, "value": v} for k, v in meta_dict.items()] + data["metadata"] = [ + {"element": k, "value": v} for k, v in meta_dict.items() + ] elif meta_keys: meta_dict = self._get_metadata_dict() data["metadata"] = [ From 35a4cf7121a26f126bc8bee8b4a0361934672583 Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Fri, 27 Feb 2026 11:26:46 +0100 Subject: [PATCH 11/24] lint --- ...3aa2429_convert_metadata_to_json_column.py | 20 ++++++++----------- src/simdb/database/database.py | 19 +++++++++--------- src/simdb/database/models/simulation.py | 3 +-- 3 files changed, 19 insertions(+), 23 deletions(-) diff --git a/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py index 20fb4147..c647897f 100644 --- a/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py +++ b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py @@ -6,13 +6,16 @@ """ +import json +import pickle from typing import Sequence, Union -from alembic import op import sqlalchemy as sa from sqlalchemy import text from sqlalchemy.dialects import postgresql +from alembic import op + revision: str = "28bee3aa2429" down_revision: Union[str, Sequence[str], None] = "9e9a4a7cd639" branch_labels: Union[str, Sequence[str], None] = None @@ -63,10 +66,6 @@ def upgrade() -> None: {"sim_id": sim_id}, ) - # Build JSON object - import json - import pickle - meta_dict = {} for element, value in meta_rows: # Value is stored as pickle, need to deserialize @@ -75,7 +74,7 @@ def upgrade() -> None: meta_dict[element] = ( pickle.loads(value) if isinstance(value, bytes) else value ) - except: + except Exception: meta_dict[element] = value else: meta_dict[element] = None @@ -120,10 +119,6 @@ def downgrade() -> None: """) conn.execute(migration_query) else: - # SQLite: Parse JSON and insert rows - import json - import pickle - result = conn.execute( text("SELECT id, metadata FROM simulations WHERE metadata IS NOT NULL") ) @@ -136,7 +131,8 @@ def downgrade() -> None: pickled_value = pickle.dumps(value, 0) conn.execute( text( - "INSERT INTO metadata (sim_id, element, value) VALUES (:sim_id, :element, :value)" + "INSERT INTO metadata (sim_id, element, value) " \ + "VALUES (:sim_id, :element, :value)" ), { "sim_id": sim_id, @@ -144,7 +140,7 @@ def downgrade() -> None: "value": pickled_value, }, ) - except: + except Exception: pass op.drop_column("simulations", "metadata") diff --git a/src/simdb/database/database.py b/src/simdb/database/database.py index e44c996d..7d25babc 100644 --- a/src/simdb/database/database.py +++ b/src/simdb/database/database.py @@ -9,11 +9,11 @@ import appdirs import sqlalchemy.orm -from sqlalchemy import String, Text, asc, create_engine, desc, func, or_ +from sqlalchemy import String, Text, create_engine, func from sqlalchemy import cast as sql_cast from sqlalchemy import or_ as sql_or from sqlalchemy.exc import DBAPIError, IntegrityError, SQLAlchemyError -from sqlalchemy.orm import Bundle, joinedload, scoped_session, sessionmaker +from sqlalchemy.orm import scoped_session, sessionmaker from simdb.config import Config from simdb.query import QueryType, query_compare @@ -186,7 +186,7 @@ def _get_simulation_data( :param meta_keys: List of metadata keys to include :param limit: Maximum number of results per page :param page: Page number (1-indexed) - :param sort_by: Field name to sort by (can be alias, uuid, datetime, or a metadata key) + :param sort_by: Field name to sort by (can be alias/uuid/datetime/metadata key) :param sort_asc: Sort in ascending order if True, descending if False :return: Tuple of (total_count, list of simulation dicts) """ @@ -430,13 +430,14 @@ def _get_sim_ids_from_json( meta_dict = {} for name, value, query_type in constraints: - if name in ("alias", "uuid", "creation_date"): + if name in ("alias", "uuid", "creation_date") or ( + name in meta_dict + and ( + query_type == QueryType.EXIST + or query_compare(query_type, name, meta_dict[name], value) + ) + ): sim_id_sets[(name, value, query_type)].add(row.id) - elif name in meta_dict: - if query_type == QueryType.EXIST or query_compare( - query_type, name, meta_dict[name], value - ): - sim_id_sets[(name, value, query_type)].add(row.id) if sim_id_sets: return set.intersection(*sim_id_sets.values()) diff --git a/src/simdb/database/models/simulation.py b/src/simdb/database/models/simulation.py index e78b8cfc..1ab4afa4 100644 --- a/src/simdb/database/models/simulation.py +++ b/src/simdb/database/models/simulation.py @@ -2,7 +2,6 @@ import json import sys import uuid -from collections import defaultdict from collections.abc import Iterable from datetime import datetime from enum import Enum @@ -82,7 +81,7 @@ def _update_legacy_uri(data_object: DataObject): class MetaDataWrapper: - """Temporary wrapper class to provide backwards compatibility with MetaData interface.""" + """Temporary wrapper to provide backwards compatibility with MetaData interface.""" def __init__(self, element: str, value: Any): self.element = element From 886895c47b564c1af401e501d64ef1df8d52a3ec Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Fri, 27 Feb 2026 12:12:39 +0100 Subject: [PATCH 12/24] use sql statements to only update specific field instead of whole json --- ...3aa2429_convert_metadata_to_json_column.py | 2 +- src/simdb/database/models/simulation.py | 75 +++++++++++++++++-- 2 files changed, 69 insertions(+), 8 deletions(-) diff --git a/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py index c647897f..2be7554f 100644 --- a/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py +++ b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py @@ -131,7 +131,7 @@ def downgrade() -> None: pickled_value = pickle.dumps(value, 0) conn.execute( text( - "INSERT INTO metadata (sim_id, element, value) " \ + "INSERT INTO metadata (sim_id, element, value) " "VALUES (:sim_id, :element, :value)" ), { diff --git a/src/simdb/database/models/simulation.py b/src/simdb/database/models/simulation.py index 1ab4afa4..5a3544ba 100644 --- a/src/simdb/database/models/simulation.py +++ b/src/simdb/database/models/simulation.py @@ -9,6 +9,9 @@ from pathlib import Path from typing import Any, Dict, List, Optional, Set, Union +from sqlalchemy import func, update +from sqlalchemy.orm import object_session + if sys.version_info < (3, 11): from backports.datetime_fromisoformat import MonkeyPatch @@ -305,17 +308,75 @@ def find_meta(self, name: str) -> List[Any]: return [meta_dict[name]] return [] - def remove_meta(self, name: str) -> None: - meta_dict = self._get_metadata_dict() - if name in meta_dict: - del meta_dict[name] - self._set_metadata_dict(meta_dict) + def _update_meta_sql(self, name: str, value: Any, op: str) -> bool: + """ + Try to update metadata via SQL JSON functions. + Returns True if SQL path succeeded, False if fallback should be used. + op: "set" | "remove" + """ + session = object_session(self) + if not (session and self.id): + return False + + dialect = session.bind.dialect.name + + try: + if dialect == "postgresql": + if op == "remove": + expr = Simulation._metadata.op("-")(name) + else: # set + expr = Simulation._metadata.op("||")(json.dumps({name: value})) + + session.execute( + update(Simulation) + .where(Simulation.id == self.id) + .values(_metadata=expr) + ) - def set_meta(self, name: str, value: Any) -> None: + elif dialect == "sqlite": + if op == "remove": + expr = func.json_remove(Simulation._metadata, f"$.{name}") + else: # set + expr = func.json_set( + func.coalesce(Simulation._metadata, "{}"), + f"$.{name}", + json.dumps(value), + ) + + session.execute( + update(Simulation) + .where(Simulation.id == self.id) + .values(_metadata=expr) + ) + + else: + return False + + session.flush() + session.expire(self, ["_metadata"]) + return True + + except Exception: + return False + + def _update_meta_python(self, name: str, value: Any, op: str) -> None: meta_dict = self._get_metadata_dict() - meta_dict[name] = value + + if op == "remove": + meta_dict.pop(name, None) + else: # set + meta_dict[name] = value + self._set_metadata_dict(meta_dict) + def remove_meta(self, name: str) -> None: + if not self._update_meta_sql(name, None, "remove"): + self._update_meta_python(name, None, "remove") + + def set_meta(self, name: str, value: Any) -> None: + if not self._update_meta_sql(name, value, "set"): + self._update_meta_python(name, value, "set") + def validate_meta(self) -> None: """ Check the metadata elements for duplicates, throwing an exception if found. From 0f24002647b42da67863aab8e7340860e501aa2b Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Fri, 27 Feb 2026 14:08:42 +0100 Subject: [PATCH 13/24] use MutableDict --- ...3aa2429_convert_metadata_to_json_column.py | 6 +- src/simdb/database/models/simulation.py | 82 +++---------------- 2 files changed, 15 insertions(+), 73 deletions(-) diff --git a/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py index 2be7554f..6c34209f 100644 --- a/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py +++ b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py @@ -26,13 +26,13 @@ def upgrade() -> None: """Upgrade schema.""" conn = op.get_bind() - # Add metadata JSON column to simulations table - # Use JSON type for PostgreSQL, Text for SQLite (will store JSON as text) + # Add metadata JSONB column to simulations table + # Use JSONB type for PostgreSQL, Text for SQLite (will store JSON as text) if conn.dialect.name == "postgresql": op.add_column( "simulations", sa.Column( - "metadata", postgresql.JSON(astext_type=sa.Text()), nullable=True + "metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True ), ) else: diff --git a/src/simdb/database/models/simulation.py b/src/simdb/database/models/simulation.py index 5a3544ba..e4c37247 100644 --- a/src/simdb/database/models/simulation.py +++ b/src/simdb/database/models/simulation.py @@ -9,15 +9,13 @@ from pathlib import Path from typing import Any, Dict, List, Optional, Set, Union -from sqlalchemy import func, update -from sqlalchemy.orm import object_session - if sys.version_info < (3, 11): from backports.datetime_fromisoformat import MonkeyPatch from sqlalchemy import Column, ForeignKey, Table from sqlalchemy import types as sql_types from sqlalchemy.dialects import postgresql +from sqlalchemy.ext.mutable import MutableDict from sqlalchemy.orm import relationship if "sphinx" in sys.modules: @@ -115,8 +113,10 @@ class Status(Enum): datetime = Column(sql_types.DateTime, nullable=False) _metadata = Column( "metadata", - postgresql.JSON(astext_type=sql_types.Text()).with_variant( - sql_types.Text(), "sqlite" + MutableDict.as_mutable( + postgresql.JSONB(astext_type=sql_types.Text()).with_variant( + sql_types.Text(), "sqlite" + ) ), nullable=True, default=dict, @@ -308,74 +308,16 @@ def find_meta(self, name: str) -> List[Any]: return [meta_dict[name]] return [] - def _update_meta_sql(self, name: str, value: Any, op: str) -> bool: - """ - Try to update metadata via SQL JSON functions. - Returns True if SQL path succeeded, False if fallback should be used. - op: "set" | "remove" - """ - session = object_session(self) - if not (session and self.id): - return False - - dialect = session.bind.dialect.name - - try: - if dialect == "postgresql": - if op == "remove": - expr = Simulation._metadata.op("-")(name) - else: # set - expr = Simulation._metadata.op("||")(json.dumps({name: value})) - - session.execute( - update(Simulation) - .where(Simulation.id == self.id) - .values(_metadata=expr) - ) - - elif dialect == "sqlite": - if op == "remove": - expr = func.json_remove(Simulation._metadata, f"$.{name}") - else: # set - expr = func.json_set( - func.coalesce(Simulation._metadata, "{}"), - f"$.{name}", - json.dumps(value), - ) - - session.execute( - update(Simulation) - .where(Simulation.id == self.id) - .values(_metadata=expr) - ) - - else: - return False - - session.flush() - session.expire(self, ["_metadata"]) - return True - - except Exception: - return False - - def _update_meta_python(self, name: str, value: Any, op: str) -> None: - meta_dict = self._get_metadata_dict() - - if op == "remove": - meta_dict.pop(name, None) - else: # set - meta_dict[name] = value - - self._set_metadata_dict(meta_dict) - def remove_meta(self, name: str) -> None: - if not self._update_meta_sql(name, None, "remove"): - self._update_meta_python(name, None, "remove") + if self._metadata is None: + return + if name in self._metadata: + del self._metadata[name] def set_meta(self, name: str, value: Any) -> None: - if not self._update_meta_sql(name, value, "set"): - self._update_meta_python(name, value, "set") + if self._metadata is None: + self._metadata = {} + self._metadata[name] = value def validate_meta(self) -> None: """ From e28eb77bcc51533f17951537a9e0fbb2d362d169 Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Fri, 27 Feb 2026 14:12:30 +0100 Subject: [PATCH 14/24] typing errors --- src/simdb/cli/remote_api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/simdb/cli/remote_api.py b/src/simdb/cli/remote_api.py index 0e388b24..a131c73c 100644 --- a/src/simdb/cli/remote_api.py +++ b/src/simdb/cli/remote_api.py @@ -188,14 +188,14 @@ def __init__( ) self._remote = remote try: - self._url: str = config.get_option(f"remote.{remote}.url") + self._url: str = config.get_string_option(f"remote.{remote}.url") except KeyError: raise ValueError( f"Remote '{remote}' not found. Use `simdb remote config add` to add it." ) from None self._api_url: str = f"{self._url}/v{config.api_version}/" - self._firewall: Optional[str] = config.get_option( + self._firewall: Optional[str] = config.get_string_option( f"remote.{remote}.firewall", default=None ) From 9ffc009dd0a5a7e033406ecf6db86c9ba6270d70 Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Fri, 27 Feb 2026 14:50:50 +0100 Subject: [PATCH 15/24] fix tests --- src/simdb/database/models/simulation.py | 12 ++---- src/simdb/database/models/types.py | 45 +++++++++++++++++++++++ src/simdb/remote/apis/v1_2/simulations.py | 20 +++++----- tests/remote/api/test_simulations.py | 2 +- 4 files changed, 61 insertions(+), 18 deletions(-) diff --git a/src/simdb/database/models/simulation.py b/src/simdb/database/models/simulation.py index e4c37247..679bdf52 100644 --- a/src/simdb/database/models/simulation.py +++ b/src/simdb/database/models/simulation.py @@ -12,9 +12,9 @@ if sys.version_info < (3, 11): from backports.datetime_fromisoformat import MonkeyPatch +from dateutil import parser as date_parser from sqlalchemy import Column, ForeignKey, Table from sqlalchemy import types as sql_types -from sqlalchemy.dialects import postgresql from sqlalchemy.ext.mutable import MutableDict from sqlalchemy.orm import relationship @@ -43,7 +43,7 @@ from .base import Base from .file import File -from .types import UUID +from .types import UUID, JSONType from .utils import checked_get, flatten_dict, unflatten_dict from .watcher import Watcher @@ -113,11 +113,7 @@ class Status(Enum): datetime = Column(sql_types.DateTime, nullable=False) _metadata = Column( "metadata", - MutableDict.as_mutable( - postgresql.JSONB(astext_type=sql_types.Text()).with_variant( - sql_types.Text(), "sqlite" - ) - ), + MutableDict.as_mutable(JSONType), nullable=True, default=dict, ) @@ -361,7 +357,7 @@ def from_data(cls, data: Dict[str, Union[str, Dict, List]]) -> "Simulation": simulation.alias = checked_get(data, "alias", str) if "datetime" not in data: data["datetime"] = datetime.now().isoformat() - simulation.datetime = datetime.fromisoformat(checked_get(data, "datetime", str)) + simulation.datetime = date_parser.parse(checked_get(data, "datetime", str)) if "inputs" in data: inputs = checked_get(data, "inputs", list) simulation.inputs = [File.from_data(el) for el in inputs] diff --git a/src/simdb/database/models/types.py b/src/simdb/database/models/types.py index 24c8f479..b790bfc3 100644 --- a/src/simdb/database/models/types.py +++ b/src/simdb/database/models/types.py @@ -1,4 +1,5 @@ import enum +import json import uuid from typing import Any, Dict, Optional @@ -82,6 +83,50 @@ def process_literal_param(self, value, dialect) -> Optional[urilib.URI]: return self.process_result_value(value, dialect) +class JSONType(sql_types.TypeDecorator): + """ + JSON type that properly handles serialization for SQLite and PostgreSQL. + + Uses PostgreSQL's JSONB type, otherwise uses TEXT with JSON serialization. + """ + + impl = sql_types.Text + cache_ok = True + + @property + def python_type(self): + return dict + + def load_dialect_impl(self, dialect): + if dialect.name == "postgresql": + return dialect.type_descriptor( + postgresql.JSONB(astext_type=sql_types.Text()) + ) + else: + return dialect.type_descriptor(sql_types.Text()) + + def process_bind_param(self, value, dialect): + if value is None: + return value + if dialect.name == "postgresql": + return value + else: + return json.dumps(value) + + def process_result_value(self, value, dialect): + if value is None: + return value + if dialect.name == "postgresql": + return value + else: + if isinstance(value, str): + try: + return json.loads(value) + except (json.JSONDecodeError, TypeError): + return {} + return value + + class ChoiceType(sql_types.TypeDecorator): impl = sql_types.CHAR diff --git a/src/simdb/remote/apis/v1_2/simulations.py b/src/simdb/remote/apis/v1_2/simulations.py index 3d28a12a..bd6e0353 100644 --- a/src/simdb/remote/apis/v1_2/simulations.py +++ b/src/simdb/remote/apis/v1_2/simulations.py @@ -129,7 +129,7 @@ def _build_trace(sim_id: str) -> Dict[str, Any]: status = simulation.find_meta("status") if status: - status_value = status[0].value + status_value = status[0] if isinstance(status_value, str): data["status"] = status_value else: @@ -137,19 +137,19 @@ def _build_trace(sim_id: str) -> Dict[str, Any]: status_on_name = str(data["status"]) + "_on" status_on = simulation.find_meta(status_on_name) if status_on: - data[status_on_name] = status_on[0].value + data[status_on_name] = status_on[0] replaces = simulation.find_meta("replaces") if replaces: - data["replaces"] = _build_trace(replaces[0].value) + data["replaces"] = _build_trace(replaces[0]) replaced_on = simulation.find_meta("replaced_on") if replaced_on: - data["deprecated_on"] = replaced_on[0].value + data["deprecated_on"] = replaced_on[0] replaces_reason = simulation.find_meta("replaces_reason") if replaces_reason: - data["replaces_reason"] = replaces_reason[0].value + data["replaces_reason"] = replaces_reason[0] return data @@ -408,9 +408,9 @@ def post(self, user: User): "development.disable_replaces", default=False ) and replaces - and replaces[0].value + and replaces[0] ): - sim_id = replaces[0].value + sim_id = replaces[0] try: replaces_sim = current_app.db.get_simulation(sim_id) except DatabaseError: @@ -421,7 +421,7 @@ def post(self, user: User): _update_simulation_status( replaces_sim, models_sim.Simulation.Status.DEPRECATED, user ) - replaces_sim.set_meta("replaced_by", simulation.uuid) + replaces_sim.set_meta("replaced_by", simulation.uuid.hex) current_app.db.insert_simulation(replaces_sim) current_app.db.insert_simulation(simulation) @@ -537,7 +537,9 @@ def patch(self, sim_id: str, user: Optional[User] = None): simulation = current_app.db.get_simulation(sim_id) if simulation is None: raise ValueError(f"Simulation {sim_id} not found.") - old_values = [meta.data() for meta in simulation.find_meta(key)] + old_values = [ + {"element": key, "value": v} for v in simulation.find_meta(key) + ] if key.lower() != "status": simulation.set_meta(key, value) else: diff --git a/tests/remote/api/test_simulations.py b/tests/remote/api/test_simulations.py index 2ff30f73..1aae16ab 100644 --- a/tests/remote/api/test_simulations.py +++ b/tests/remote/api/test_simulations.py @@ -170,7 +170,7 @@ def test_post_simulations_with_replaces(client): assert metadata["status"].lower() == "deprecated" # Check replaced_by metadata was added - assert metadata["replaced_by"] == new_simulation_data.simulation.uuid + assert metadata["replaced_by"] == new_simulation_data.simulation.uuid.hex # Verify the new simulation has replaces metadata rv_new_get = client.get( From df068cfd03a26c3d88d805ab23937743a3f029d6 Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Fri, 13 Mar 2026 09:59:51 +0100 Subject: [PATCH 16/24] check if metadata exists before creating --- ...3aa2429_convert_metadata_to_json_column.py | 121 +++++++++--------- 1 file changed, 62 insertions(+), 59 deletions(-) diff --git a/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py index 6c34209f..816b886d 100644 --- a/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py +++ b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py @@ -25,68 +25,71 @@ def upgrade() -> None: """Upgrade schema.""" conn = op.get_bind() - - # Add metadata JSONB column to simulations table - # Use JSONB type for PostgreSQL, Text for SQLite (will store JSON as text) - if conn.dialect.name == "postgresql": - op.add_column( - "simulations", - sa.Column( - "metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True - ), - ) - else: - op.add_column("simulations", sa.Column("metadata", sa.Text(), nullable=True)) - - # Migrate existing metadata from metadata table to JSON column - # First, we need to aggregate metadata by simulation - if conn.dialect.name == "postgresql": - # PostgreSQL: Use json_object_agg - migration_query = text(""" - UPDATE simulations - SET metadata = subq.meta_json - FROM ( - SELECT sim_id, json_object_agg(element, value) as meta_json - FROM metadata - GROUP BY sim_id - ) AS subq - WHERE simulations.id = subq.sim_id - """) - conn.execute(migration_query) - else: - # SQLite: Build JSON manually using group_concat - # This is more complex, we'll handle it per simulation - result = conn.execute(text("SELECT DISTINCT sim_id FROM metadata")) - sim_ids = [row[0] for row in result] - - for sim_id in sim_ids: - # Get all metadata for this simulation - meta_rows = conn.execute( - text("SELECT element, value FROM metadata WHERE sim_id = :sim_id"), - {"sim_id": sim_id}, + inspector = sa.inspect(conn) + + # Add metadata column only if it doesn't already exist (e.g. created via create_all) + existing_columns = [col["name"] for col in inspector.get_columns("simulations")] + if "metadata" not in existing_columns: + if conn.dialect.name == "postgresql": + op.add_column( + "simulations", + sa.Column( + "metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True + ), ) - - meta_dict = {} - for element, value in meta_rows: - # Value is stored as pickle, need to deserialize - if value is not None: - try: - meta_dict[element] = ( - pickle.loads(value) if isinstance(value, bytes) else value - ) - except Exception: - meta_dict[element] = value - else: - meta_dict[element] = None - - conn.execute( - text("UPDATE simulations SET metadata = :metadata WHERE id = :sim_id"), - {"metadata": json.dumps(meta_dict), "sim_id": sim_id}, + else: + op.add_column( + "simulations", sa.Column("metadata", sa.Text(), nullable=True) ) - op.drop_index("metadata_index", table_name="metadata") - op.drop_index(op.f("ix_metadata_sim_id"), table_name="metadata") - op.drop_table("metadata") + # Migrate existing data from metadata table if it still exists + if "metadata" in inspector.get_table_names(): + if conn.dialect.name == "postgresql": + migration_query = text(""" + UPDATE simulations + SET metadata = subq.meta_json + FROM ( + SELECT sim_id, json_object_agg(element, value) as meta_json + FROM metadata + GROUP BY sim_id + ) AS subq + WHERE simulations.id = subq.sim_id + """) + conn.execute(migration_query) + else: + result = conn.execute(text("SELECT DISTINCT sim_id FROM metadata")) + sim_ids = [row[0] for row in result] + + for sim_id in sim_ids: + meta_rows = conn.execute( + text("SELECT element, value FROM metadata WHERE sim_id = :sim_id"), + {"sim_id": sim_id}, + ) + + meta_dict = {} + for element, value in meta_rows: + if value is not None: + try: + meta_dict[element] = ( + pickle.loads(value) + if isinstance(value, bytes) + else value + ) + except Exception: + meta_dict[element] = value + else: + meta_dict[element] = None + + conn.execute( + text( + "UPDATE simulations SET metadata = :metadata WHERE id = :sim_id" + ), + {"metadata": json.dumps(meta_dict), "sim_id": sim_id}, + ) + + op.drop_index("metadata_index", table_name="metadata") + op.drop_index(op.f("ix_metadata_sim_id"), table_name="metadata") + op.drop_table("metadata") def downgrade() -> None: From 4cf5461bda2e35b1902d2f2713c06b1e7a178bdf Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Fri, 13 Mar 2026 10:08:05 +0100 Subject: [PATCH 17/24] remove custom serialization and use sqlalchemy --- src/simdb/database/database.py | 55 ++++--------------------- src/simdb/database/models/simulation.py | 25 ++++------- src/simdb/database/models/types.py | 44 -------------------- 3 files changed, 16 insertions(+), 108 deletions(-) diff --git a/src/simdb/database/database.py b/src/simdb/database/database.py index 7d25babc..8f518a8e 100644 --- a/src/simdb/database/database.py +++ b/src/simdb/database/database.py @@ -1,5 +1,4 @@ import contextlib -import json import sys import uuid from datetime import datetime @@ -202,17 +201,7 @@ def _get_simulation_data( "datetime": row.datetime.isoformat(), } - metadata_json = row._metadata - if metadata_json: - if isinstance(metadata_json, str): - try: - meta_dict = json.loads(metadata_json) - except (json.JSONDecodeError, TypeError): - meta_dict = {} - else: - meta_dict = metadata_json if isinstance(metadata_json, dict) else {} - else: - meta_dict = {} + meta_dict = row._metadata or {} sim_data["_meta_dict"] = meta_dict @@ -418,16 +407,7 @@ def _get_sim_ids_from_json( rows = query.all() for row in rows: - if row._metadata: - if isinstance(row._metadata, str): - try: - meta_dict = json.loads(row._metadata) - except (json.JSONDecodeError, TypeError): - meta_dict = {} - else: - meta_dict = row._metadata if isinstance(row._metadata, dict) else {} - else: - meta_dict = {} + meta_dict = row._metadata or {} for name, value, query_type in constraints: if name in ("alias", "uuid", "creation_date") or ( @@ -578,16 +558,8 @@ def list_metadata_keys(self) -> List[dict]: simulations = self.session.query(Simulation._metadata).all() keys_dict = {} - for (metadata_json,) in simulations: - if metadata_json: - if isinstance(metadata_json, str): - try: - meta_dict = json.loads(metadata_json) - except (json.JSONDecodeError, TypeError): - continue - else: - meta_dict = metadata_json if isinstance(metadata_json, dict) else {} - + for (meta_dict,) in simulations: + if meta_dict: for key, value in meta_dict.items(): if key not in keys_dict: keys_dict[key] = value @@ -604,21 +576,10 @@ def list_metadata_values(self, name: str) -> List[str]: simulations = self.session.query(Simulation._metadata).all() values_set = set() - for (metadata_json,) in simulations: - if metadata_json: - if isinstance(metadata_json, str): - try: - meta_dict = json.loads(metadata_json) - except (json.JSONDecodeError, TypeError): - continue - else: - meta_dict = ( - metadata_json if isinstance(metadata_json, dict) else {} - ) - - if name in meta_dict: - val = meta_dict[name] - values_set.add(str(val) if val is not None else None) + for (meta_dict,) in simulations: + if meta_dict and name in meta_dict: + val = meta_dict[name] + values_set.add(str(val) if val is not None else None) data = list(values_set) diff --git a/src/simdb/database/models/simulation.py b/src/simdb/database/models/simulation.py index a390faf2..3c1938e4 100644 --- a/src/simdb/database/models/simulation.py +++ b/src/simdb/database/models/simulation.py @@ -1,8 +1,6 @@ import itertools -import json import sys import uuid -from collections.abc import Iterable from datetime import datetime from enum import Enum from getpass import getuser @@ -13,8 +11,9 @@ from backports.datetime_fromisoformat import MonkeyPatch from dateutil import parser as date_parser -from sqlalchemy import Column, ForeignKey, Table +from sqlalchemy import JSON, Column, ForeignKey, Table from sqlalchemy import types as sql_types +from sqlalchemy.dialects import postgresql from sqlalchemy.ext.mutable import MutableDict from sqlalchemy.orm import relationship @@ -43,7 +42,7 @@ from .base import Base from .file import File -from .types import UUID, JSONType +from .types import UUID from .utils import checked_get, flatten_dict, unflatten_dict from .watcher import Watcher @@ -113,7 +112,9 @@ class Status(Enum): datetime = Column(sql_types.DateTime, nullable=False) _metadata = Column( "metadata", - MutableDict.as_mutable(JSONType), + MutableDict.as_mutable( + postgresql.JSONB(astext_type=sql_types.Text()).with_variant(JSON(), "sqlite") + ), nullable=True, default=dict, ) @@ -144,12 +145,7 @@ def meta(self, value: List): def _get_metadata_dict(self) -> Dict[str, Any]: if self._metadata is None: return {} - if isinstance(self._metadata, str): - try: - return json.loads(self._metadata) - except (json.JSONDecodeError, TypeError): - return {} - return self._metadata if isinstance(self._metadata, dict) else {} + return self._metadata def _set_metadata_dict(self, meta_dict: Dict[str, Any]) -> None: self._metadata = meta_dict @@ -271,12 +267,7 @@ def __str__(self): result += "metadata:\n" meta_dict = self._get_metadata_dict() for element, value in meta_dict.items(): - if ( - isinstance(value, Iterable) - and not isinstance(value, np.ndarray) - and isinstance(value, str) - and "\n" in value - ): + if isinstance(value, str) and "\n" in value: first_line = True for line in value.split("\n"): if first_line: diff --git a/src/simdb/database/models/types.py b/src/simdb/database/models/types.py index b790bfc3..9751c969 100644 --- a/src/simdb/database/models/types.py +++ b/src/simdb/database/models/types.py @@ -1,5 +1,4 @@ import enum -import json import uuid from typing import Any, Dict, Optional @@ -83,49 +82,6 @@ def process_literal_param(self, value, dialect) -> Optional[urilib.URI]: return self.process_result_value(value, dialect) -class JSONType(sql_types.TypeDecorator): - """ - JSON type that properly handles serialization for SQLite and PostgreSQL. - - Uses PostgreSQL's JSONB type, otherwise uses TEXT with JSON serialization. - """ - - impl = sql_types.Text - cache_ok = True - - @property - def python_type(self): - return dict - - def load_dialect_impl(self, dialect): - if dialect.name == "postgresql": - return dialect.type_descriptor( - postgresql.JSONB(astext_type=sql_types.Text()) - ) - else: - return dialect.type_descriptor(sql_types.Text()) - - def process_bind_param(self, value, dialect): - if value is None: - return value - if dialect.name == "postgresql": - return value - else: - return json.dumps(value) - - def process_result_value(self, value, dialect): - if value is None: - return value - if dialect.name == "postgresql": - return value - else: - if isinstance(value, str): - try: - return json.loads(value) - except (json.JSONDecodeError, TypeError): - return {} - return value - class ChoiceType(sql_types.TypeDecorator): impl = sql_types.CHAR From b5249db4a856fbcaed87c17ce69c24dac4fafdf2 Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Fri, 13 Mar 2026 10:11:31 +0100 Subject: [PATCH 18/24] formatting --- src/simdb/database/models/simulation.py | 4 +++- src/simdb/database/models/types.py | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/simdb/database/models/simulation.py b/src/simdb/database/models/simulation.py index 3c1938e4..e3a7aadf 100644 --- a/src/simdb/database/models/simulation.py +++ b/src/simdb/database/models/simulation.py @@ -113,7 +113,9 @@ class Status(Enum): _metadata = Column( "metadata", MutableDict.as_mutable( - postgresql.JSONB(astext_type=sql_types.Text()).with_variant(JSON(), "sqlite") + postgresql.JSONB(astext_type=sql_types.Text()).with_variant( + JSON(), "sqlite" + ) ), nullable=True, default=dict, diff --git a/src/simdb/database/models/types.py b/src/simdb/database/models/types.py index 9751c969..24c8f479 100644 --- a/src/simdb/database/models/types.py +++ b/src/simdb/database/models/types.py @@ -82,7 +82,6 @@ def process_literal_param(self, value, dialect) -> Optional[urilib.URI]: return self.process_result_value(value, dialect) - class ChoiceType(sql_types.TypeDecorator): impl = sql_types.CHAR From 0b62d579a1d0ad15bb38b0b129d12dc8078d1043 Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Fri, 13 Mar 2026 10:28:25 +0100 Subject: [PATCH 19/24] small fixes --- src/simdb/database/database.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/simdb/database/database.py b/src/simdb/database/database.py index 8f518a8e..5aa25488 100644 --- a/src/simdb/database/database.py +++ b/src/simdb/database/database.py @@ -189,9 +189,8 @@ def _get_simulation_data( :param sort_asc: Sort in ascending order if True, descending if False :return: Tuple of (total_count, list of simulation dicts) """ - total_count = query.count() - all_rows = query.all() + total_count = len(all_rows) results = [] for row in all_rows: @@ -223,8 +222,7 @@ def get_sort_key(item): val = item.get("_meta_dict", {}).get(sort_by, "") # Handle None values - put them at the end if val is None: - return ("", "") if sort_asc else ("~", "~") - # Convert to string for consistent sorting + return "" if sort_asc else "~" return str(val).lower() if isinstance(val, str) else str(val) results.sort(key=get_sort_key, reverse=not sort_asc) @@ -569,7 +567,7 @@ def list_metadata_keys(self) -> List[dict]: def list_metadata_values(self, name: str) -> List[str]: if name == "alias": query = self.session.query(Simulation.alias).filter( - Simulation.alias is not None + Simulation.alias.isnot(None) ) data = [row[0] for row in query.all()] else: From 15858f1055b3d8d4c27881e21c5f897612526ceb Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Fri, 13 Mar 2026 16:44:25 +0100 Subject: [PATCH 20/24] yannick comments --- ...3aa2429_convert_metadata_to_json_column.py | 63 +++++++++---------- src/simdb/database/models/simulation.py | 5 -- 2 files changed, 29 insertions(+), 39 deletions(-) diff --git a/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py index 816b886d..324370d2 100644 --- a/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py +++ b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py @@ -39,47 +39,42 @@ def upgrade() -> None: ) else: op.add_column( - "simulations", sa.Column("metadata", sa.Text(), nullable=True) + "simulations", sa.Column("metadata", sa.JSON(), nullable=True) ) # Migrate existing data from metadata table if it still exists if "metadata" in inspector.get_table_names(): - if conn.dialect.name == "postgresql": - migration_query = text(""" - UPDATE simulations - SET metadata = subq.meta_json - FROM ( - SELECT sim_id, json_object_agg(element, value) as meta_json - FROM metadata - GROUP BY sim_id - ) AS subq - WHERE simulations.id = subq.sim_id - """) - conn.execute(migration_query) - else: - result = conn.execute(text("SELECT DISTINCT sim_id FROM metadata")) - sim_ids = [row[0] for row in result] + result = conn.execute(text("SELECT DISTINCT sim_id FROM metadata")) + sim_ids = [row[0] for row in result] - for sim_id in sim_ids: - meta_rows = conn.execute( - text("SELECT element, value FROM metadata WHERE sim_id = :sim_id"), - {"sim_id": sim_id}, - ) + for sim_id in sim_ids: + meta_rows = conn.execute( + text("SELECT element, value FROM metadata WHERE sim_id = :sim_id"), + {"sim_id": sim_id}, + ) - meta_dict = {} - for element, value in meta_rows: - if value is not None: - try: - meta_dict[element] = ( - pickle.loads(value) - if isinstance(value, bytes) - else value - ) - except Exception: - meta_dict[element] = value - else: - meta_dict[element] = None + meta_dict = {} + for element, value in meta_rows: + if value is not None: + try: + meta_dict[element] = ( + pickle.loads(value) + if isinstance(value, bytes) + else value + ) + except Exception: + meta_dict[element] = value + else: + meta_dict[element] = None + if conn.dialect.name == "postgresql": + conn.execute( + text( + "UPDATE simulations SET metadata = :metadata::jsonb WHERE id = :sim_id" + ), + {"metadata": json.dumps(meta_dict), "sim_id": sim_id}, + ) + else: conn.execute( text( "UPDATE simulations SET metadata = :metadata WHERE id = :sim_id" diff --git a/src/simdb/database/models/simulation.py b/src/simdb/database/models/simulation.py index e3a7aadf..fff57b6e 100644 --- a/src/simdb/database/models/simulation.py +++ b/src/simdb/database/models/simulation.py @@ -139,11 +139,6 @@ def meta(self) -> List[MetaDataWrapper]: meta_dict = self._get_metadata_dict() return [MetaDataWrapper(k, v) for k, v in meta_dict.items()] - @meta.setter - def meta(self, value: List): - """Setter for backwards compatibility - not typically used.""" - pass - def _get_metadata_dict(self) -> Dict[str, Any]: if self._metadata is None: return {} From 7850422a2f029b50e5a9fa3b4d7fac25ce14a0b4 Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Fri, 13 Mar 2026 16:46:13 +0100 Subject: [PATCH 21/24] format --- .../versions/28bee3aa2429_convert_metadata_to_json_column.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py index 324370d2..2b8e79b3 100644 --- a/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py +++ b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py @@ -58,9 +58,7 @@ def upgrade() -> None: if value is not None: try: meta_dict[element] = ( - pickle.loads(value) - if isinstance(value, bytes) - else value + pickle.loads(value) if isinstance(value, bytes) else value ) except Exception: meta_dict[element] = value From a83b1c0703e41a658f143649328cca09d5fe6415 Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Fri, 13 Mar 2026 16:51:43 +0100 Subject: [PATCH 22/24] linting --- .../versions/28bee3aa2429_convert_metadata_to_json_column.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py index 2b8e79b3..4cf9eaa2 100644 --- a/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py +++ b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py @@ -68,7 +68,8 @@ def upgrade() -> None: if conn.dialect.name == "postgresql": conn.execute( text( - "UPDATE simulations SET metadata = :metadata::jsonb WHERE id = :sim_id" + "UPDATE simulations SET metadata = :metadata::jsonb" + " WHERE id = :sim_id" ), {"metadata": json.dumps(meta_dict), "sim_id": sim_id}, ) From 3c1eacd4709202facbdd94c55a8ad91f2664bf13 Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Thu, 19 Mar 2026 15:36:06 +0100 Subject: [PATCH 23/24] add json serializable --- ...3aa2429_convert_metadata_to_json_column.py | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py index 4cf9eaa2..2b6b2b0c 100644 --- a/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py +++ b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py @@ -8,7 +8,7 @@ import json import pickle -from typing import Sequence, Union +from typing import Any, Sequence, Union import sqlalchemy as sa from sqlalchemy import text @@ -22,6 +22,23 @@ depends_on: Union[str, Sequence[str], None] = None + +def _make_json_serializable(value: Any) -> Any: + """Recursively convert a value to something JSON-serializable. + + Numpy arrays fall through to str(), which uses numpy's print threshold and + truncates large arrays — avoiding multi-hundred-MB JSON for array-valued metadata. + """ + if value is None or isinstance(value, (str, int, float, bool)): + return value + if isinstance(value, (list, tuple)): + return [_make_json_serializable(v) for v in value] + if isinstance(value, dict): + return {str(k): _make_json_serializable(v) for k, v in value.items()} + # Covers numpy arrays (truncated by numpy's print threshold), datetimes, etc. + return str(value) + + def upgrade() -> None: """Upgrade schema.""" conn = op.get_bind() @@ -57,11 +74,12 @@ def upgrade() -> None: for element, value in meta_rows: if value is not None: try: - meta_dict[element] = ( + unpickled = ( pickle.loads(value) if isinstance(value, bytes) else value ) except Exception: - meta_dict[element] = value + unpickled = repr(value) + meta_dict[element] = _make_json_serializable(unpickled) else: meta_dict[element] = None From 90864886cd3a31565df740fa56e47f7ab8ccd3ea Mon Sep 17 00:00:00 2001 From: Alexandra Ioan Date: Thu, 19 Mar 2026 15:38:41 +0100 Subject: [PATCH 24/24] ruff --- alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py | 1 - 1 file changed, 1 deletion(-) diff --git a/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py index 2b6b2b0c..b262fa0b 100644 --- a/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py +++ b/alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py @@ -22,7 +22,6 @@ depends_on: Union[str, Sequence[str], None] = None - def _make_json_serializable(value: Any) -> Any: """Recursively convert a value to something JSON-serializable.