Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
a1ab4f2
add alembic migration
ioan-alexandra Feb 10, 2026
51708f7
add alembic to pyproject, delete db and adjust migration
ioan-alexandra Feb 10, 2026
bb7523e
Merge branch 'iterorganization:develop' into develop
ioan-alexandra Feb 11, 2026
7f65c43
format
ioan-alexandra Feb 11, 2026
5bad088
lint
ioan-alexandra Feb 11, 2026
7846265
redo autogenerate alembic to add files
ioan-alexandra Feb 13, 2026
3712954
lint
ioan-alexandra Feb 13, 2026
dbc96bc
let Base.metadata handle all models
ioan-alexandra Feb 13, 2026
0d6008d
Merge branch 'develop' of github.com:ioan-alexandra/SimDB into develop
ioan-alexandra Feb 26, 2026
1d5b15b
switch from metadata field to json column
ioan-alexandra Feb 27, 2026
40f93a3
take out limit Header since its fixed in another pr
ioan-alexandra Feb 27, 2026
6b3f56c
format
ioan-alexandra Feb 27, 2026
35a4cf7
lint
ioan-alexandra Feb 27, 2026
886895c
use sql statements to only update specific field instead of whole json
ioan-alexandra Feb 27, 2026
0f24002
use MutableDict
ioan-alexandra Feb 27, 2026
e28eb77
typing errors
ioan-alexandra Feb 27, 2026
9ffc009
fix tests
ioan-alexandra Feb 27, 2026
8e75576
Merge branch 'iterorganization:develop' into develop
ioan-alexandra Mar 5, 2026
a47eb5c
Merge branch 'iterorganization:develop' into develop
ioan-alexandra Mar 13, 2026
df068cf
check if metadata exists before creating
ioan-alexandra Mar 13, 2026
4cf5461
remove custom serialization and use sqlalchemy
ioan-alexandra Mar 13, 2026
b5249db
formatting
ioan-alexandra Mar 13, 2026
0b62d57
small fixes
ioan-alexandra Mar 13, 2026
15858f1
yannick comments
ioan-alexandra Mar 13, 2026
7850422
format
ioan-alexandra Mar 13, 2026
a83b1c0
linting
ioan-alexandra Mar 13, 2026
3c1eacd
add json serializable
ioan-alexandra Mar 19, 2026
9086488
ruff
ioan-alexandra Mar 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 160 additions & 0 deletions alembic/versions/28bee3aa2429_convert_metadata_to_json_column.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
"""convert_metadata_to_json_column

Revision ID: 28bee3aa2429
Revises: 9e9a4a7cd639
Create Date: 2026-02-26 17:01:30.925750

"""

import json
import pickle
from typing import Any, Sequence, Union

import sqlalchemy as sa
from sqlalchemy import text
from sqlalchemy.dialects import postgresql

from alembic import op

revision: str = "28bee3aa2429"
down_revision: Union[str, Sequence[str], None] = "9e9a4a7cd639"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def _make_json_serializable(value: Any) -> Any:
"""Recursively convert a value to something JSON-serializable.

Numpy arrays fall through to str(), which uses numpy's print threshold and
truncates large arrays — avoiding multi-hundred-MB JSON for array-valued metadata.
"""
if value is None or isinstance(value, (str, int, float, bool)):
return value
if isinstance(value, (list, tuple)):
return [_make_json_serializable(v) for v in value]
if isinstance(value, dict):
return {str(k): _make_json_serializable(v) for k, v in value.items()}
# Covers numpy arrays (truncated by numpy's print threshold), datetimes, etc.
return str(value)


def upgrade() -> None:
"""Upgrade schema."""
conn = op.get_bind()
inspector = sa.inspect(conn)

# Add metadata column only if it doesn't already exist (e.g. created via create_all)
existing_columns = [col["name"] for col in inspector.get_columns("simulations")]
if "metadata" not in existing_columns:
if conn.dialect.name == "postgresql":
op.add_column(
"simulations",
sa.Column(
"metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True
),
)
else:
op.add_column(
"simulations", sa.Column("metadata", sa.JSON(), nullable=True)
)

# Migrate existing data from metadata table if it still exists
if "metadata" in inspector.get_table_names():
result = conn.execute(text("SELECT DISTINCT sim_id FROM metadata"))
sim_ids = [row[0] for row in result]

for sim_id in sim_ids:
meta_rows = conn.execute(
text("SELECT element, value FROM metadata WHERE sim_id = :sim_id"),
{"sim_id": sim_id},
)

meta_dict = {}
for element, value in meta_rows:
if value is not None:
try:
unpickled = (
pickle.loads(value) if isinstance(value, bytes) else value
)
except Exception:
unpickled = repr(value)
meta_dict[element] = _make_json_serializable(unpickled)
else:
meta_dict[element] = None

if conn.dialect.name == "postgresql":
conn.execute(
text(
"UPDATE simulations SET metadata = :metadata::jsonb"
" WHERE id = :sim_id"
),
{"metadata": json.dumps(meta_dict), "sim_id": sim_id},
)
else:
conn.execute(
text(
"UPDATE simulations SET metadata = :metadata WHERE id = :sim_id"
),
{"metadata": json.dumps(meta_dict), "sim_id": sim_id},
)

op.drop_index("metadata_index", table_name="metadata")
op.drop_index(op.f("ix_metadata_sim_id"), table_name="metadata")
op.drop_table("metadata")


def downgrade() -> None:
"""Downgrade schema."""
conn = op.get_bind()

# Recreate metadata table
op.create_table(
"metadata",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("sim_id", sa.Integer(), nullable=True),
sa.Column("element", sa.String(length=250), nullable=False),
sa.Column("value", sa.PickleType(), nullable=True),
sa.ForeignKeyConstraint(
["sim_id"],
["simulations.id"],
),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(op.f("ix_metadata_sim_id"), "metadata", ["sim_id"], unique=False)
op.create_index("metadata_index", "metadata", ["sim_id", "element"], unique=True)

# Migrate data back from JSON column to metadata table
if conn.dialect.name == "postgresql":
migration_query = text("""
INSERT INTO metadata (sim_id, element, value)
SELECT s.id, kv.key, kv.value::text
FROM simulations s, json_each_text(s.metadata::json) kv
WHERE s.metadata IS NOT NULL
""")
conn.execute(migration_query)
else:
result = conn.execute(
text("SELECT id, metadata FROM simulations WHERE metadata IS NOT NULL")
)
for sim_id, metadata_json in result:
if metadata_json:
try:
meta_dict = json.loads(metadata_json)
for element, value in meta_dict.items():
# Pickle the value for storage
pickled_value = pickle.dumps(value, 0)
conn.execute(
text(
"INSERT INTO metadata (sim_id, element, value) "
"VALUES (:sim_id, :element, :value)"
),
{
"sim_id": sim_id,
"element": element,
"value": pickled_value,
},
)
except Exception:
pass

op.drop_column("simulations", "metadata")
Loading
Loading