AustralianCancerDataNetwork · gkennos · May 18, 2026 · May 19, 2026 · May 19, 2026 · May 19, 2026
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,73 @@
+name: Tests
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+
+jobs:
+  unit-and-sqlite-tests:
+    name: Unit & SQLite tests (Python ${{ matrix.python-version }})
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.12", "3.13"]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: pip install -e ".[dev]"
+
+      - name: Run tests (excluding postgres)
+        run: pytest -m "not postgres" -q
+
+  postgres-integration-tests:
+    name: PostgreSQL integration tests (Python ${{ matrix.python-version }})
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.12", "3.13"]
+
+    services:
+      postgres:
+        image: postgres:16
+        env:
+          POSTGRES_USER: test
+          POSTGRES_PASSWORD: test
+          POSTGRES_DB: test_db
+        ports:
+          - 55432:5432
+        options: >-
+          --health-cmd "pg_isready -U test -d test_db"
+          --health-interval 2s
+          --health-timeout 5s
+          --health-retries 10
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies (including postgres extra)
+        run: pip install -e ".[dev,postgres]"
+
+      - name: Run postgres integration tests
+        run: pytest -m postgres -v
+        env:
+          PGHOST: localhost
+          PGPORT: 55432
+          PGUSER: test
+          PGPASSWORD: test
+          PGDATABASE: test_db
+          ENGINE_CDM: postgresql+psycopg://test:test@localhost:55432/test_db
diff --git a/.gitignore b/.gitignore
@@ -52,6 +52,9 @@ RELATIONSHIP.csv
 DOMAIN.csv
 CONCEPT_ANCESTOR.csv
 CONCEPT_SYNONYM.csv
+# Allow committed test fixtures (minimal CSVs, not real Athena downloads)
+!tests/fixtures/athena_source/
+!tests/fixtures/athena_source/*.csv
 data/
 *.db-journal
 vocabulary_files/
@@ -66,4 +69,7 @@ logging/
 _temp/
 temp/
 *.dump
-*.bak
+*.bak
+notebooks/
+.dockerignore
+docker/
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -91,4 +91,11 @@
 - set minimum versions per dependabot (dev and required deps)
 
 ## 0.6.2
-- capped maximum `orm-loader` version to avoid pulling in future breaking changes
+- capped maximum `orm-loader` version to avoid pulling in future breaking changes
+
+## 0.6.3
+- fix CSV quote mode for Athena vocabulary loading: switch from `literal` to `auto` to prevent quoted concept names from overflowing `VARCHAR(255)` database columns
+- make `chunksize=100_000` the default for `load-vocab-source` (was `None`/disabled); pass `--chunksize 0` to disable chunking explicitly
+- **breaking:** `load-vocab-source` CLI now defaults `--merge-strategy` to `replace` (was `upsert`) to match the Python API default and ensure retired concepts are purged on vocabulary refresh; pass `--merge-strategy upsert` to restore the previous behaviour
+- **breaking:** CLI entry point renamed from `omop-maint` to `omop-alchemy`; update any scripts or aliases accordingly (saved `.omop-maint.toml` defaults files are unaffected)
+- remove stale notebooks from repository
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
diff --git a/docker/jupyter/Dockerfile b/docker/jupyter/Dockerfile
diff --git a/docker/postgres/Dockerfile b/docker/postgres/Dockerfile
diff --git a/docker/postgres/custom.conf b/docker/postgres/custom.conf
diff --git a/docker/python/.dockerignore b/docker/python/.dockerignore
diff --git a/docker/python/Dockerfile b/docker/python/Dockerfile
diff --git a/docs/advanced/fulltext.md b/docs/advanced/fulltext.md
@@ -58,8 +58,8 @@ SELECT 'a fat cat sat on a mat and ate a fat rat'::tsvector;
 To enable the optional full-text sidecars in a PostgreSQL environment:
 
 ```bash
-omop-maint fulltext install
-omop-maint fulltext populate
+omop-alchemy fulltext install
+omop-alchemy fulltext populate
 ```
 
 If your running Python process should use the stored sidecar columns through ORM
@@ -164,28 +164,28 @@ This is the mode you want when:
 The maintenance CLI manages the full-text sidecars through:
 
 ```bash
-omop-maint fulltext install
-omop-maint fulltext populate
-omop-maint fulltext drop
+omop-alchemy fulltext install
+omop-alchemy fulltext populate
+omop-alchemy fulltext drop
 ```
 
 Typical workflow:
 
 ```bash
-omop-maint fulltext install
-omop-maint fulltext populate
+omop-alchemy fulltext install
+omop-alchemy fulltext populate
 ```
 
 If you later reload or update vocabulary data, refresh the stored vectors with:
 
 ```bash
-omop-maint fulltext populate
+omop-alchemy fulltext populate
 ```
 
 If you want to remove the feature completely:
 
 ```bash
-omop-maint fulltext drop
+omop-alchemy fulltext drop
 ```
 
 ---
@@ -280,7 +280,7 @@ drop lifecycle is only meaningful on PostgreSQL.
 ## Operational Gotchas
 
 - treat the sidecar columns as **derived search state**, not source-of-truth data
-- if you bulk-load new vocabulary rows, rerun `omop-maint fulltext populate`
+- if you bulk-load new vocabulary rows, rerun `omop-alchemy fulltext populate`
 - if you use `reconcile-schema`, the sidecar columns and indexes are intentional
   database additions outside the core OMOP schema
 - GIN indexes can be expensive to build on large vocabularies, so plan that as a real

diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md
@@ -180,14 +180,14 @@ At the database level:
 Typical maintenance workflow:
 
 ```bash
-omop-maint fulltext install
-omop-maint fulltext populate
+omop-alchemy fulltext install
+omop-alchemy fulltext populate
 ```
 
 If you later reload vocabulary data, rerun:
 
 ```bash
-omop-maint fulltext populate
+omop-alchemy fulltext populate
 ```
 
 For the full design and query patterns, see: