From 8360d15d7accbc5e07fd4a7779b6c5ef161a2b4d Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 18 Apr 2026 17:30:36 +0000 Subject: [PATCH 1/3] Migrate API to sqlite backend with MCP and compose deployment Co-authored-by: James Guillochon --- .env.example | 7 + Dockerfile.api | 19 +++ Dockerfile.ingest | 17 ++ Dockerfile.mcp | 13 ++ README.md | 41 +++++ api.py | 247 ++++++++++++------------------ classes/apidata.py | 11 ++ classes/models.py | 51 ++++++ classes/query_service.py | 53 +++++++ classes/sqlite_store.py | 148 ++++++++++++++++++ docker-compose.yml | 60 ++++++++ docs/deployment-compose.md | 51 ++++++ mcp_server.py | 97 ++++++++++++ messages.json | 2 - requirements.txt | 7 + scripts/ingest_static_catalogs.py | 164 ++++++++++++++++++++ tests/test_migration_smoke.py | 86 +++++++++++ 17 files changed, 921 insertions(+), 153 deletions(-) create mode 100644 .env.example create mode 100644 Dockerfile.api create mode 100644 Dockerfile.ingest create mode 100644 Dockerfile.mcp create mode 100644 classes/models.py create mode 100644 classes/query_service.py create mode 100644 classes/sqlite_store.py create mode 100644 docker-compose.yml create mode 100644 docs/deployment-compose.md create mode 100644 mcp_server.py create mode 100644 scripts/ingest_static_catalogs.py create mode 100644 tests/test_migration_smoke.py diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..cd7fc3f --- /dev/null +++ b/.env.example @@ -0,0 +1,7 @@ +OAC_BACKEND=sqlite +OAC_DB_PATH=/data/oacapi.db +API_PORT=5000 +API_WORKERS=1 +API_THREADS=2 +AC_PATH=./astrocats +INGEST_AC_PATH=/root/astrocats/astrocats diff --git a/Dockerfile.api b/Dockerfile.api new file mode 100644 index 0000000..8d78ef5 --- /dev/null +++ b/Dockerfile.api @@ -0,0 +1,19 @@ +FROM python:3.11-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt /app/requirements.txt +RUN pip install --no-cache-dir -r /app/requirements.txt + +COPY . /app + +EXPOSE 5000 + +CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "1", "--threads", "2", "wsgi:app"] diff --git a/Dockerfile.ingest b/Dockerfile.ingest new file mode 100644 index 0000000..bb447ce --- /dev/null +++ b/Dockerfile.ingest @@ -0,0 +1,17 @@ +FROM python:3.11-slim + +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +WORKDIR /app + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt /app/requirements.txt +RUN pip install --no-cache-dir -r /app/requirements.txt + +COPY . /app + +CMD ["python", "scripts/ingest_static_catalogs.py"] diff --git a/Dockerfile.mcp b/Dockerfile.mcp new file mode 100644 index 0000000..ba58948 --- /dev/null +++ b/Dockerfile.mcp @@ -0,0 +1,13 @@ +FROM python:3.11-slim + +WORKDIR /app + +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +COPY requirements.txt /app/requirements.txt +RUN pip install --no-cache-dir -r /app/requirements.txt + +COPY . /app + +CMD ["python", "mcp_server.py"] diff --git a/README.md b/README.md index b7528d3..9dda435 100644 --- a/README.md +++ b/README.md @@ -136,3 +136,44 @@ https://api.astrocats.space/catalog/photometry/time+band+magnitude?ra=21:23:32.1 #### Return the instruments used to produce spectra within a 5° of a given coordinate, in CSV format https://api.astrocats.space/catalog/spectra/instrument?ra=21:23:32.16&dec=-53:01:36.08&radius=18000&format=csv + +## Modern backend and deployment + +This project now supports a modern SQLite-backed runtime that preserves the API +signature and response semantics while reducing memory pressure on constrained +nodes. The API does not interact with external services. + +### Runtime modes + +Use environment variables to choose the backend: + +- `OAC_BACKEND=sqlite` (default): query a pre-built SQLite snapshot. +- `OAC_BACKEND=legacy`: load original JSON files directly from astrocatalog + repositories. + +For SQLite mode: + +- `OAC_DB_PATH` points to the sqlite file (default `/data/oacapi.db`). +- Build the sqlite snapshot using: + +```bash +python scripts/ingest_static_catalogs.py --db-path /data/oacapi.db --ac-path /root/astrocats/astrocats +``` + +### MCP layer + +An MCP server is provided in `mcp_server.py` and exposes: + +- `query_api`: executes the same route/query semantics as the HTTP API. +- `health`: service health probe. + +### Docker Compose deployment + +Use compose to run API + MCP and optionally ingest: + +```bash +docker compose --profile ingest run --rm ingest +docker compose up --build -d api mcp +``` + +See `docs/deployment-compose.md` for full instructions. diff --git a/api.py b/api.py index 85afb02..3a5b8fe 100644 --- a/api.py +++ b/api.py @@ -8,13 +8,16 @@ from timeit import default_timer as timer import numpy as np -from astrocats.catalog.utils import is_integer, is_number, sortOD +from astrocats.catalog.utils import is_integer, is_number from astropy import units as un from astropy.coordinates import SkyCoord as coord from astropy.coordinates import concatenate as coord_concat from flask import Flask, Response, request from six import string_types -from werkzeug.contrib.fixers import ProxyFix +try: + from werkzeug.middleware.proxy_fix import ProxyFix +except Exception: + from werkzeug.contrib.fixers import ProxyFix from classes.apidata import ApiData from flask_compress import Compress @@ -122,6 +125,8 @@ def get_filename(name): def get_output_json_path(name, cat): """Get full path to output JSON file.""" + if apidata.use_sqlite: + return None return os.path.join(apidata._AC_PATH, apidata._CATS[cat][0], 'output', 'json', get_filename(name)) @@ -134,20 +139,43 @@ def bool_str(x): def load_cats(): """Reload the catalog dictionaries.""" logger.info('Loading catalog...') + apidata._catalogs = OrderedDict() + apidata._cat_keys = OrderedDict() + apidata._aliases = OrderedDict() + apidata._all_aliases = set() + + if apidata._backend == 'sqlite': + if apidata.use_sqlite: + logger.info('Loading catalog data from sqlite backend...') + apidata._catalogs, apidata._cat_keys = apidata._store.load_catalogs() + apidata._extras = OrderedDict( + (cat, OrderedDict()) for cat in apidata._catalogs) + else: + logger.warning( + 'Configured sqlite backend but database %s is missing. ' + 'Starting with empty catalog cache.', + apidata._db_path + ) + apidata._extras = OrderedDict() + else: + for cat in apidata._CATS: + apidata._catalogs[cat] = json.load(open(os.path.join( + apidata._AC_PATH, apidata._CATS[cat][0], 'output', + apidata._CATS[cat][1]), 'r'), + object_pairs_hook=OrderedDict) + # Add some API-specific fields to each catalog. + for i, x in enumerate(apidata._catalogs[cat]): + apidata._catalogs[cat][i]['catalog'] = cat + apidata._catalogs[cat] = OrderedDict(sorted(dict( + zip([x['name'] for x in apidata._catalogs[cat]], + apidata._catalogs[cat])).items(), + key=lambda s: (s[0].upper(), s[0]))) + if cat not in apidata._extras: + apidata._extras[cat] = OrderedDict() for cat in apidata._CATS: - apidata._catalogs[cat] = json.load(open(os.path.join( - apidata._AC_PATH, apidata._CATS[cat][0], 'output', - apidata._CATS[cat][1]), 'r'), - object_pairs_hook=OrderedDict) - # Add some API-specific fields to each catalog. - for i, x in enumerate(apidata._catalogs[cat]): - apidata._catalogs[cat][i]['catalog'] = cat - apidata._catalogs[cat] = OrderedDict(sorted(dict( - zip([x['name'] for x in apidata._catalogs[cat]], - apidata._catalogs[cat])).items(), - key=lambda s: (s[0].upper(), s[0]))) - if cat not in apidata._extras: - apidata._extras[cat] = OrderedDict() + apidata._catalogs.setdefault(cat, OrderedDict()) + apidata._cat_keys.setdefault(cat, set()) + apidata._extras.setdefault(cat, OrderedDict()) logger.info('Creating alias dictionary and position arrays...') apidata._rdnames = [] @@ -156,8 +184,9 @@ def load_cats(): apidata._all = [] # Load object apidata._catalogs. - for cat in apidata._CATS: - apidata._cat_keys[cat] = set() + for cat in apidata._catalogs: + if cat not in apidata._cat_keys: + apidata._cat_keys[cat] = set() for event in apidata._catalogs[cat]: add_event(cat, event, convert_coords=False) @@ -178,8 +207,13 @@ def load_cats(): def load_atels(): """Reload the ATel dictionaries.""" # Load astronomer's telegrams. - with gzip.open(os.path.join( - '/root', 'better-atel', 'atels.json.gz'), 'rb') as f: + atel_path = os.path.join('/root', 'better-atel', 'atels.json.gz') + if not os.path.exists(atel_path): + logger.warning('ATel file %s missing, disabling ATel lookups.', atel_path) + apidata._atels = [] + apidata._atel_txts = [] + return + with gzip.open(atel_path, 'rb') as f: apidata._atels = json.loads(f.read().decode('utf-8')) apidata._atel_txts = [ (x.get('title', '') + ': ' + x.get('body', '') + ' [' + @@ -187,100 +221,6 @@ def load_atels(): for x in apidata._atels] -def handle_tns(event): - """Add a newly announced TNS event.""" - from astrocats.catalog.entry import ENTRY, Entry - import time - import urllib - - tns_name = 'Transient Name Server' - tns_url = 'https://wis-tns.weizmann.ac.il/' - # First, create the JSON file. - - if event.startswith(('AT', 'SN', 'at', 'sn')): - name = event.upper() - else: - name = 'AT' + event - - qname = replace_multiple(name.lower(), ['at', 'sn']) - - cat = 'sne' - - # Check if already in catalog, if so skip. - if name.lower() in apidata._all_aliases: - return False - - new_event = Entry(name=name) - - source = new_event.add_source(name=tns_name, url=tns_url) - - data = urllib.parse.urlencode({ - 'api_key': apidata._tnskey, - 'data': json.dumps({ - 'objname': qname, - 'photometry': '1' - }) - }).encode('ascii') - req = urllib.request.Request( - 'https://wis-tns.weizmann.ac.il/api/get/object', data=data) - trys = 0 - objdict = None - while trys < 3 and not objdict: - try: - objdict = json.loads(urllib.request.urlopen( - req, timeout=30).read().decode('ascii'))[ - 'data']['reply'] - except KeyboardInterrupt: - raise - except Exception: - logger.info('API request failed for `{}`.'.format(name)) - time.sleep(5) - trys = trys + 1 - - logger.info(objdict) - - if (not objdict or 'objname' not in objdict or - not isinstance(objdict['objname'], str)): - logger.info('Object `{}` not found!'.format(name)) - return False - objdict = sortOD(objdict) - - if objdict.get('ra'): - new_event.add_quantity(ENTRY.RA, str(objdict['ra']), source=source) - if objdict.get('dec'): - new_event.add_quantity(ENTRY.DEC, str(objdict['dec']), source=source) - if objdict.get('redshift'): - new_event.add_quantity( - ENTRY.REDSHIFT, str(objdict['redshift']), source=source) - if objdict.get('internal_name'): - new_event.add_quantity( - ENTRY.ALIAS, str(objdict['internal_name']), source=source) - - new_event.sanitize() - oentry = new_event._ordered(new_event) - - outfile = os.path.join( - apidata._AC_PATH, apidata._CATS[cat][0], 'output', - apidata._CATS[cat][2], name + '.json') - if not os.path.exists(outfile): - entabbed_json_dump( - {name: oentry}, open(outfile, 'w'), - separators=(',', ':')) - - # Then, load it into the API dicts. - if name not in apidata._catalogs[cat]: - apidata._catalogs[cat][name] = oentry - apidata._extras[cat][name] = oentry - - # Record the extras dictionary for debugging. - entabbed_json_dump(apidata._extras, open('extras.json', 'w'), - separators=(',', ':')) - - add_event(cat, name) - - return True - - def add_event(cat, event, convert_coords=True): """Add event to global arrays.""" apidata._all.append(event) @@ -381,7 +321,7 @@ def get(self, catalog_name, event_name=None, quantity_name=None, request.remote_addr, catalog_name, event_name, quantity_name, attribute_name, request.headers.get('User-Agent', '?'))) - req_vals = request.get_json() + req_vals = request.get_json(silent=True) if not req_vals: req_vals = request.values @@ -400,10 +340,6 @@ def get(self, catalog_name, event_name=None, quantity_name=None, logger.info(line) return msg('atels_reloaded') - if event_name == 'new_tns': - result = handle_tns(quantity_name) - return msg('new_tns' if result else 'failed_tns', quantity_name) - start = timer() result = self.retrieve(catalog_name, event_name, quantity_name, attribute_name, False) @@ -487,7 +423,7 @@ def retrieve_objects( qname = quantity_name aname = attribute_name - req_vals = request.get_json() + req_vals = request.get_json(silent=True) if not req_vals: req_vals = request.values @@ -727,34 +663,47 @@ def retrieve_objects( return msg('event_not_found', event) continue if full: - fpath = get_output_json_path(my_event, my_cat) - if not os.path.exists(fpath): - for opt in alopts: - fpath = None - if opt == my_event: - continue - fpath = get_output_json_path(opt, my_cat) - if os.path.exists(fpath): - logger.info( - '"{}.json" not found at expected path, ' - 'found at "{}.json" instead.'.format(my_event, opt)) - break - else: - logger.info( - '"{}.json" not found at expected path or ' - 'alternative paths [{}].' - .format(my_event, ', '.join(alopts))) - return msg('file_not_found') - - file_event = json.load( - open(fpath, 'r'), object_pairs_hook=OrderedDict) - _, file_event[my_event] = file_event.popitem() - file_event[my_event]['catalog'] = my_cat - - fcatalogs.update(file_event) - sources[my_event] = [ - x.get('bibcode', x.get('arxivid', x.get('name'))) - for x in fcatalogs[my_event].get('sources')] + if apidata.use_sqlite: + lookup_names = [my_event] + [opt[1] for opt in alopts] + resolved_name, full_event = apidata._store.get_full_event_any_alias( + my_cat, lookup_names + ) + if full_event is None: + return msg('file_not_found', my_event) + fcatalogs[my_event] = full_event + sources[my_event] = [ + x.get('bibcode', x.get('arxivid', x.get('name'))) + for x in fcatalogs[my_event].get('sources', []) + ] + else: + fpath = get_output_json_path(my_event, my_cat) + if not os.path.exists(fpath): + for opt in alopts: + fpath = None + if opt == my_event: + continue + fpath = get_output_json_path(opt, my_cat) + if os.path.exists(fpath): + logger.info( + '"{}.json" not found at expected path, ' + 'found at "{}.json" instead.'.format(my_event, opt)) + break + else: + logger.info( + '"{}.json" not found at expected path or ' + 'alternative paths [{}].' + .format(my_event, ', '.join(alopts))) + return msg('file_not_found', my_event) + + file_event = json.load( + open(fpath, 'r'), object_pairs_hook=OrderedDict) + _, file_event[my_event] = file_event.popitem() + file_event[my_event]['catalog'] = my_cat + + fcatalogs.update(file_event) + sources[my_event] = [ + x.get('bibcode', x.get('arxivid', x.get('name'))) + for x in fcatalogs[my_event].get('sources')] if qname is None: if full: edict[event] = fcatalogs.get(my_event, {}) @@ -1063,10 +1012,6 @@ def get_event_dsv( '/'.join(['', cn, en, qn, an]), '/'.join(['', cn, en, qn, an]) + '/') -# Load TNS API key. -with open('tns.key', 'r') as f: - apidata._tnskey = f.read().splitlines()[0] - load_cats() load_atels() diff --git a/classes/apidata.py b/classes/apidata.py index d9e899f..87f28d3 100644 --- a/classes/apidata.py +++ b/classes/apidata.py @@ -2,6 +2,8 @@ import os from collections import OrderedDict +from classes.sqlite_store import SqliteStore + class ApiData(object): """Object to store data for the OACAPI.""" @@ -24,6 +26,9 @@ class ApiData(object): def __init__(self): """Initialize.""" + self._backend = os.environ.get('OAC_BACKEND', 'sqlite').lower() + self._db_path = os.environ.get('OAC_DB_PATH', os.path.join('/data', 'oacapi.db')) + self._store = SqliteStore(self._db_path) self._coo = None self._catalogs = OrderedDict() self._cat_keys = OrderedDict() @@ -34,3 +39,9 @@ def __init__(self): self._decs = [] self._all_events = [] self._rdnames = [] + self._all = [] + + @property + def use_sqlite(self): + """Return whether sqlite backend should be used.""" + return self._backend == 'sqlite' and self._store.exists() diff --git a/classes/models.py b/classes/models.py new file mode 100644 index 0000000..1c1eb1c --- /dev/null +++ b/classes/models.py @@ -0,0 +1,51 @@ +"""SQLAlchemy models for persisted OAC API data.""" +from sqlalchemy import Column, Float, Integer, String, Text, UniqueConstraint +from sqlalchemy.orm import declarative_base + +Base = declarative_base() + + +class EventRecord(Base): + """Persisted event data from astrocatalog repositories.""" + + __tablename__ = "events" + + id = Column(Integer, primary_key=True, autoincrement=True) + catalog = Column(String(64), nullable=False, index=True) + name = Column(String(255), nullable=False, index=True) + normalized_name = Column(String(255), nullable=False, index=True) + summary_json = Column(Text, nullable=False) + full_json = Column(Text, nullable=True) + ra_deg = Column(Float, nullable=True, index=True) + dec_deg = Column(Float, nullable=True, index=True) + + __table_args__ = (UniqueConstraint("catalog", "name", name="uix_catalog_name"),) + + +class AliasRecord(Base): + """Alias lookups for event resolution.""" + + __tablename__ = "aliases" + + id = Column(Integer, primary_key=True, autoincrement=True) + catalog = Column(String(64), nullable=False, index=True) + event_name = Column(String(255), nullable=False, index=True) + alias_raw = Column(String(255), nullable=False) + alias_norm = Column(String(255), nullable=False, index=True) + + __table_args__ = ( + UniqueConstraint( + "catalog", "event_name", "alias_norm", name="uix_alias_catalog_event_norm" + ), + ) + + +class IngestMeta(Base): + """Tracks the provenance of ingested static repositories.""" + + __tablename__ = "ingest_meta" + + id = Column(Integer, primary_key=True, autoincrement=True) + key = Column(String(64), nullable=False, unique=True) + value = Column(Text, nullable=False) + diff --git a/classes/query_service.py b/classes/query_service.py new file mode 100644 index 0000000..33b87b5 --- /dev/null +++ b/classes/query_service.py @@ -0,0 +1,53 @@ +"""Service wrapper to call API logic from HTTP and MCP layers.""" +from collections import OrderedDict + +from flask import Response + + +class QueryService(object): + """Programmatic query facade preserving API behavior.""" + + def __init__(self): + # Import lazily to avoid module-import cycles during app bootstrap. + from api import Catalog # pylint: disable=import-outside-toplevel + + self._resource = Catalog() + + def execute( + self, + catalog_name, + event_name=None, + quantity_name=None, + attribute_name=None, + params=None, + method="GET", + ): + """Execute API query and return structured result payload.""" + params = params or {} + from api import app # pylint: disable=import-outside-toplevel + + upper_method = method.upper() + with app.test_request_context( + method=upper_method, + path="/", + json=params if upper_method == "POST" else None, + query_string=None if upper_method == "POST" else params, + ): + result = self._resource.get( + catalog_name, event_name=event_name, quantity_name=quantity_name, attribute_name=attribute_name + ) + + if isinstance(result, Response): + return { + "kind": "response", + "status": result.status_code, + "mimetype": result.mimetype, + "body": result.get_data(as_text=True), + } + if isinstance(result, OrderedDict): + return {"kind": "json", "status": 200, "body": result} + if isinstance(result, dict): + return {"kind": "json", "status": 200, "body": result} + if isinstance(result, list): + return {"kind": "json", "status": 200, "body": result} + return {"kind": "raw", "status": 200, "body": result} diff --git a/classes/sqlite_store.py b/classes/sqlite_store.py new file mode 100644 index 0000000..7c524b2 --- /dev/null +++ b/classes/sqlite_store.py @@ -0,0 +1,148 @@ +"""SQLite-backed persistence and lookup helpers for OAC API.""" +import json +import os +from collections import OrderedDict, defaultdict + +from sqlalchemy import create_engine, select +from sqlalchemy.orm import Session + +from classes.models import AliasRecord, Base, EventRecord, IngestMeta + + +class SqliteStore(object): + """Thin SQLAlchemy wrapper for catalog event data.""" + + def __init__(self, db_path): + """Initialize an engine/session factory for a sqlite database.""" + self.db_path = db_path + self.db_url = "sqlite:///" + db_path + self.engine = create_engine(self.db_url, future=True) + + def exists(self): + """Return whether sqlite file is present on disk.""" + return os.path.exists(self.db_path) + + def create_schema(self): + """Create SQLAlchemy schema in the backing database.""" + Base.metadata.create_all(self.engine) + + def clear_all(self): + """Delete all rows from tables.""" + with Session(self.engine) as session: + session.query(AliasRecord).delete() + session.query(EventRecord).delete() + session.query(IngestMeta).delete() + session.commit() + + def upsert_meta(self, key, value): + """Insert/update ingest metadata key/value pairs.""" + with Session(self.engine) as session: + record = session.execute( + select(IngestMeta).where(IngestMeta.key == key) + ).scalar_one_or_none() + if record is None: + record = IngestMeta(key=key, value=value) + session.add(record) + else: + record.value = value + session.commit() + + def insert_events(self, rows): + """Bulk insert event rows.""" + with Session(self.engine) as session: + session.add_all([EventRecord(**row) for row in rows]) + session.commit() + + def insert_aliases(self, rows): + """Bulk insert alias rows.""" + with Session(self.engine) as session: + session.add_all([AliasRecord(**row) for row in rows]) + session.commit() + + def load_catalogs(self): + """Load summary catalog records into legacy in-memory layout.""" + catalogs = OrderedDict() + cat_keys = OrderedDict() + with Session(self.engine) as session: + records = session.execute( + select(EventRecord).order_by(EventRecord.catalog, EventRecord.name) + ).scalars() + for record in records: + if record.catalog not in catalogs: + catalogs[record.catalog] = OrderedDict() + cat_keys[record.catalog] = set() + summary = json.loads(record.summary_json, object_pairs_hook=OrderedDict) + summary["catalog"] = record.catalog + catalogs[record.catalog][record.name] = summary + cat_keys[record.catalog].update(summary.keys()) + return catalogs, cat_keys + + def load_all_catalog_names(self): + """Return catalog names known by the sqlite snapshot.""" + with Session(self.engine) as session: + rows = session.execute( + select(EventRecord.catalog).distinct().order_by(EventRecord.catalog) + ).all() + return [catalog for (catalog,) in rows] + + def load_alias_index(self): + """Return alias lookup structures in API-native shape.""" + aliases = OrderedDict() + all_aliases = set() + with Session(self.engine) as session: + records = session.execute( + select(AliasRecord).order_by( + AliasRecord.alias_norm, AliasRecord.catalog, AliasRecord.event_name + ) + ).scalars() + for record in records: + aliases.setdefault(record.alias_norm, []).append( + [record.catalog, record.event_name, record.alias_raw] + ) + all_aliases.add(record.alias_raw.lower()) + return aliases, all_aliases + + def get_full_event(self, catalog, event_name): + """Return full event JSON from DB if available.""" + with Session(self.engine) as session: + record = session.execute( + select(EventRecord).where( + EventRecord.catalog == catalog, EventRecord.name == event_name + ) + ).scalar_one_or_none() + if record is None: + return None + payload = record.full_json if record.full_json else record.summary_json + full_event = json.loads(payload, object_pairs_hook=OrderedDict) + full_event["catalog"] = catalog + return full_event + + def get_full_event_any_alias(self, catalog, candidate_event_names): + """Resolve first full event payload for candidate names.""" + for event_name in candidate_event_names: + event = self.get_full_event(catalog, event_name) + if event is not None: + return event_name, event + return None, None + + def coordinates(self): + """Yield event names and coordinate tuples if present.""" + rows = [] + with Session(self.engine) as session: + records = session.execute( + select(EventRecord.name, EventRecord.ra_deg, EventRecord.dec_deg) + ).all() + for name, ra_deg, dec_deg in records: + if ra_deg is None or dec_deg is None: + continue + rows.append((name, ra_deg, dec_deg)) + return rows + + def counts_by_catalog(self): + """Return row counts by catalog for diagnostics.""" + counters = defaultdict(int) + with Session(self.engine) as session: + records = session.execute(select(EventRecord.catalog)).all() + for (catalog,) in records: + counters[catalog] += 1 + return dict(sorted(counters.items())) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..b0d0171 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,60 @@ +version: "3.9" + +services: + api: + build: + context: . + dockerfile: Dockerfile.api + environment: + OAC_BACKEND: ${OAC_BACKEND:-sqlite} + OAC_DB_PATH: ${OAC_DB_PATH:-/data/oacapi.db} + PYTHONUNBUFFERED: "1" + command: gunicorn --bind 0.0.0.0:5000 --workers ${API_WORKERS:-1} --threads ${API_THREADS:-2} wsgi:app + ports: + - "${API_PORT:-5000}:5000" + volumes: + - oacapi-data:/data + - ${AC_PATH:-./astrocats}:/root/astrocats/astrocats:ro + healthcheck: + test: ["CMD", "python", "-c", "from api import app; print('ok')"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 20s + mem_limit: 640m + restart: unless-stopped + + mcp: + build: + context: . + dockerfile: Dockerfile.mcp + environment: + OAC_BACKEND: ${OAC_BACKEND:-sqlite} + OAC_DB_PATH: ${OAC_DB_PATH:-/data/oacapi.db} + PYTHONUNBUFFERED: "1" + command: python mcp_server.py + volumes: + - oacapi-data:/data + - ${AC_PATH:-./astrocats}:/root/astrocats/astrocats:ro + depends_on: + - api + mem_limit: 256m + restart: unless-stopped + + ingest: + build: + context: . + dockerfile: Dockerfile.ingest + profiles: ["ingest"] + environment: + OAC_DB_PATH: ${OAC_DB_PATH:-/data/oacapi.db} + AC_PATH: ${INGEST_AC_PATH:-/root/astrocats/astrocats} + PYTHONUNBUFFERED: "1" + command: python scripts/ingest_static_catalogs.py + volumes: + - oacapi-data:/data + - ${AC_PATH:-./astrocats}:/root/astrocats/astrocats:ro + mem_limit: 768m + +volumes: + oacapi-data: diff --git a/docs/deployment-compose.md b/docs/deployment-compose.md new file mode 100644 index 0000000..ef3e891 --- /dev/null +++ b/docs/deployment-compose.md @@ -0,0 +1,51 @@ +# Docker Compose deployment + +This repository supports deploying the API and MCP server with Docker Compose. + +## Services + +- `api`: HTTP API (`wsgi:app`) with unchanged route signature. +- `mcp`: MCP layer exposing `query_api` and docs resources. +- `ingest` (profile): one-shot job to build `/data/oacapi.db` from static astrocatalog repos. + +## Prerequisites + +1. Docker and Docker Compose. +2. A local checkout of static astrocatalog repositories mounted at `./astrocats` (or set `AC_PATH`). + +## Quick start + +1. Copy environment defaults: + +```bash +cp .env.example .env +``` + +2. Build the sqlite snapshot (required for `OAC_BACKEND=sqlite`): + +```bash +docker compose --profile ingest run --rm ingest +``` + +3. Start API and MCP services: + +```bash +docker compose up --build -d api mcp +``` + +4. Verify API: + +```bash +curl "http://localhost:5000/" +``` + +## Memory notes + +Compose includes per-service memory caps intended for constrained hosts: + +- `api`: `640m` +- `mcp`: `256m` +- `ingest`: `768m` (one-shot profile) + +Tune these values in `docker-compose.yml` if your host constraints differ. + diff --git a/mcp_server.py b/mcp_server.py new file mode 100644 index 0000000..50a1efb --- /dev/null +++ b/mcp_server.py @@ -0,0 +1,97 @@ +"""MCP server exposing OAC API-compatible query tooling.""" +import json +import logging +from collections import OrderedDict + +from classes.query_service import QueryService + +try: + from fastmcp import FastMCP +except Exception as exc: # pragma: no cover - import guard for optional runtime + raise RuntimeError( + "fastmcp is required for MCP mode. Install dependencies from requirements.txt." + ) from exc + + +LOGGER = logging.getLogger("oacapi.mcp") +SERVICE = QueryService() +MCP = FastMCP("OpenAstronomyCatalogAPI") + + +def _convert_payload(value): + """Normalize ordered mappings into standard JSON-compatible structures.""" + if isinstance(value, OrderedDict): + return {k: _convert_payload(v) for k, v in value.items()} + if isinstance(value, list): + return [_convert_payload(v) for v in value] + if isinstance(value, dict): + return {k: _convert_payload(v) for k, v in value.items()} + return value + + +@MCP.tool +def query_api( + catalog_name, + event_name=None, + quantity_name=None, + attribute_name=None, + params=None, + method="GET", +): + """Execute the same query semantics as the HTTP API. + + Args: + catalog_name: Catalog route segment (e.g. `sne`, `catalog`, `all`). + event_name: Optional event segment. + quantity_name: Optional quantity segment. + attribute_name: Optional attribute segment. + params: Optional query parameter dict. + method: HTTP-style method (`GET` or `POST`). + """ + response = SERVICE.execute( + catalog_name=catalog_name, + event_name=event_name, + quantity_name=quantity_name, + attribute_name=attribute_name, + params=params or {}, + method=method, + ) + response["body"] = _convert_payload(response.get("body")) + return response + + +@MCP.tool +def health(): + """Return basic MCP service health data.""" + return {"status": "ok", "service": "oacapi-mcp"} + + +@MCP.resource("oacapi://docs/signature") +def signature_reference(): + """Provide route signature guidance for clients.""" + return ( + "Route signature: //// " + "with optional URL params. Use query_api tool arguments to map segments." + ) + + +@MCP.resource("oacapi://docs/query-example") +def query_example(): + """Provide a representative API query example.""" + return json.dumps( + { + "catalog_name": "sne", + "event_name": "SN2014J", + "quantity_name": "photometry", + "attribute_name": "magnitude+band", + "params": {"band": "B"}, + "method": "GET", + }, + indent=2, + ) + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + LOGGER.info("Starting MCP server via stdio transport") + MCP.run() diff --git a/messages.json b/messages.json index 65e3312..7a5dc49 100644 --- a/messages.json +++ b/messages.json @@ -5,13 +5,11 @@ "cant_sort":"Unable to sort by '{}' unless attribute is included in query.", "cats_reloaded":"Catalogs reloaded from disk.", "event_not_found":"Event '{}' not found in any catalog.", - "failed_tns":"Event {} not added from TNS.", "file_not_found":"Event {} not found at its expected path, please create an issue at https://github.com/astrocatalogs/supernovae/issues.", "fmt_unsupported":"{} not supported for this query type.", "height_limited":"Height limited to {} degrees.", "invalid_regex":"Invalid regex '{}' specified for '{}'.", "max_events":"Maximum event limit ({}) exceeded.", - "new_tns":"Event {} added from TNS.", "no_atels_found":"No telegrams match your query.", "no_catalog_route":"The astrocats.space domain does not support 'catalog/' routes. Please use one of the specific catalog routes instead (e.g. api.sne.space for supernovae, api.tde.space for tidal disruptions, etc.).", "no_delimited":"This query does not support delimited output.", diff --git a/requirements.txt b/requirements.txt index fad53c2..b9ecfd7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,10 @@ astrocats numpy flask six +astropy +flask_compress +flask_restful +flask_cors +gunicorn +sqlalchemy +fastmcp diff --git a/scripts/ingest_static_catalogs.py b/scripts/ingest_static_catalogs.py new file mode 100644 index 0000000..8768153 --- /dev/null +++ b/scripts/ingest_static_catalogs.py @@ -0,0 +1,164 @@ +"""Ingest static astrocatalog repositories into a SQLite snapshot.""" +import argparse +import json +import logging +import os +import re +from collections import OrderedDict +from datetime import datetime + +from astropy.coordinates import SkyCoord as coord +from astropy import units as un + +from classes.apidata import ApiData +from classes.sqlite_store import SqliteStore + +LOGGER = logging.getLogger("ingest") + + +def normalize_alias(value): + """Normalize aliases to the form used by API lookup tables.""" + return str(value).lower().replace(" ", "") + + +def alias_variants(value): + """Generate API-compatible alias variants.""" + out = set() + lowered = str(value).lower() + out.add(lowered) + if lowered.startswith(("sn", "at")): + out.add(re.sub(r"^(sn|at)", "", lowered)) + return sorted(out) + + +def read_json(path): + """Read JSON preserving ordering to reduce output drift.""" + with open(path, "r") as handle: + return json.load(handle, object_pairs_hook=OrderedDict) + + +def parse_degrees(event): + """Try to compute decimal degrees from RA/Dec fields.""" + ra = event.get("ra") + dec = event.get("dec") + if not ra or not dec: + return None, None + ra_val = ra[0].get("value") if isinstance(ra, list) and ra else None + dec_val = dec[0].get("value") if isinstance(dec, list) and dec else None + if not ra_val or not dec_val: + return None, None + + try: + c = coord(str(ra_val), str(dec_val), unit=(un.hourangle, un.deg)) + except Exception: + try: + c = coord(float(ra_val), float(dec_val), unit=(un.deg, un.deg)) + except Exception: + return None, None + return float(c.ra.deg), float(c.dec.deg) + + +def load_full_event(apidata, catalog, event_name): + """Load full event JSON if present, otherwise return None.""" + catalog_meta = apidata._CATS[catalog] + base_dir = os.path.join(apidata._AC_PATH, catalog_meta[0], "output", "json") + primary_path = os.path.join(base_dir, event_name.replace("/", "_") + ".json") + if not os.path.exists(primary_path): + return None + payload = read_json(primary_path) + _, event = payload.popitem() + event["catalog"] = catalog + return event + + +def ingest_catalogs(apidata): + """Yield event and alias rows from all configured catalogs.""" + event_rows = [] + alias_rows = [] + for catalog, cat_meta in apidata._CATS.items(): + catalog_path = os.path.join(apidata._AC_PATH, cat_meta[0], "output", cat_meta[1]) + if not os.path.exists(catalog_path): + LOGGER.warning("Catalog file missing: %s", catalog_path) + continue + entries = read_json(catalog_path) + for entry in entries: + name = entry["name"] + summary = OrderedDict(entry) + summary["catalog"] = catalog + ra_deg, dec_deg = parse_degrees(summary) + full_event = load_full_event(apidata, catalog, name) + event_rows.append( + { + "catalog": catalog, + "name": name, + "normalized_name": normalize_alias(name), + "summary_json": json.dumps(summary, separators=(",", ":"), ensure_ascii=False), + "full_json": ( + json.dumps(full_event, separators=(",", ":"), ensure_ascii=False) + if full_event is not None + else None + ), + "ra_deg": ra_deg, + "dec_deg": dec_deg, + } + ) + variants = set(alias_variants(name)) + for alias in summary.get("alias", []): + alias_value = alias.get("value") + if alias_value: + variants.update(alias_variants(alias_value)) + for variant in variants: + alias_rows.append( + { + "catalog": catalog, + "event_name": name, + "alias_raw": variant, + "alias_norm": normalize_alias(variant), + } + ) + return event_rows, alias_rows + + +def parse_args(): + """Parse CLI arguments.""" + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--db-path", + default=os.environ.get("OAC_DB_PATH", "/data/oacapi.db"), + help="Path for generated sqlite database.", + ) + parser.add_argument( + "--ac-path", + default=os.environ.get("AC_PATH", ApiData._AC_PATH), + help="Path to astrocats repositories root.", + ) + return parser.parse_args() + + +def main(): + """Entrypoint for ingest command.""" + logging.basicConfig(level=logging.INFO) + args = parse_args() + apidata = ApiData() + apidata._AC_PATH = args.ac_path + + db_dir = os.path.dirname(args.db_path) + if db_dir: + os.makedirs(db_dir, exist_ok=True) + store = SqliteStore(args.db_path) + store.create_schema() + store.clear_all() + + event_rows, alias_rows = ingest_catalogs(apidata) + LOGGER.info("Ingesting %d events and %d aliases", len(event_rows), len(alias_rows)) + if event_rows: + store.insert_events(event_rows) + if alias_rows: + store.insert_aliases(alias_rows) + store.upsert_meta("ingest_time_utc", datetime.utcnow().isoformat() + "Z") + store.upsert_meta("event_count", str(len(event_rows))) + LOGGER.info("SQLite snapshot ready at %s", args.db_path) + + +if __name__ == "__main__": + main() diff --git a/tests/test_migration_smoke.py b/tests/test_migration_smoke.py new file mode 100644 index 0000000..ace7ae1 --- /dev/null +++ b/tests/test_migration_smoke.py @@ -0,0 +1,86 @@ +"""Smoke tests for sqlite-backed API and MCP-compatible query layer.""" +import os +import tempfile +import unittest +from collections import OrderedDict + +from classes.apidata import ApiData +from classes.sqlite_store import SqliteStore + + +def _sample_summary(name, catalog): + return OrderedDict( + [ + ("name", name), + ("catalog", catalog), + ("alias", [{"value": name}]), + ("ra", [{"value": "12:00:00"}]), + ("dec", [{"value": "+02:00:00"}]), + ("redshift", [{"value": "0.1"}]), + ] + ) + + +class MigrationSmokeTests(unittest.TestCase): + """Validate compatibility plumbing around sqlite backend.""" + + def setUp(self): + self.tmpdir = tempfile.TemporaryDirectory() + self.db_path = os.path.join(self.tmpdir.name, "oacapi.db") + os.environ["OAC_BACKEND"] = "sqlite" + os.environ["OAC_DB_PATH"] = self.db_path + + store = SqliteStore(self.db_path) + store.create_schema() + store.insert_events( + [ + { + "catalog": "sne", + "name": "SNTEST", + "normalized_name": "sntest", + "summary_json": '{"name":"SNTEST","catalog":"sne","alias":[{"value":"SNTEST"}],"ra":[{"value":"12:00:00"}],"dec":[{"value":"+02:00:00"}],"redshift":[{"value":"0.1"}]}', + "full_json": None, + "ra_deg": 180.0, + "dec_deg": 2.0, + } + ] + ) + store.insert_aliases( + [ + { + "catalog": "sne", + "event_name": "SNTEST", + "alias_raw": "sntest", + "alias_norm": "sntest", + } + ] + ) + + def tearDown(self): + self.tmpdir.cleanup() + os.environ.pop("OAC_BACKEND", None) + os.environ.pop("OAC_DB_PATH", None) + + def test_apidata_uses_sqlite_when_present(self): + """ApiData should detect sqlite backend when DB exists.""" + apidata = ApiData() + self.assertTrue(apidata.use_sqlite) + catalogs, keys = apidata._store.load_catalogs() + self.assertIn("sne", catalogs) + self.assertIn("SNTEST", catalogs["sne"]) + self.assertIn("redshift", keys["sne"]) + + def test_query_service_executes(self): + """QueryService should invoke API semantics under test context.""" + # Import lazily after environment has been prepared. + from classes.query_service import QueryService + + service = QueryService() + result = service.execute("sne", event_name="SNTEST", quantity_name="redshift") + self.assertEqual(result["status"], 200) + self.assertEqual(result["kind"], "json") + self.assertIn("SNTEST", result["body"]) + + +if __name__ == "__main__": + unittest.main() From 12220337cd4438adae8491f1b353c854e0b48478 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 18 Apr 2026 18:15:45 +0000 Subject: [PATCH 2/3] Remove astrocats package dependency Co-authored-by: James Guillochon --- .travis.yml | 2 +- api.py | 2 +- classes/compat.py | 40 ++++++++++++++++++++++++++++++++++++++++ requirements.txt | 1 - 4 files changed, 42 insertions(+), 3 deletions(-) create mode 100644 classes/compat.py diff --git a/.travis.yml b/.travis.yml index a7fdf81..d1fd6a7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -25,7 +25,7 @@ before_install: install: - conda update setuptools - conda config --add channels conda-forge - - conda install --yes astrocats astropy flask numpy six + - conda install --yes astropy flask numpy six - pip install flask_compress flask_restful script: diff --git a/api.py b/api.py index 3a5b8fe..0fb7b00 100644 --- a/api.py +++ b/api.py @@ -8,7 +8,6 @@ from timeit import default_timer as timer import numpy as np -from astrocats.catalog.utils import is_integer, is_number from astropy import units as un from astropy.coordinates import SkyCoord as coord from astropy.coordinates import concatenate as coord_concat @@ -20,6 +19,7 @@ from werkzeug.contrib.fixers import ProxyFix from classes.apidata import ApiData +from classes.compat import is_integer, is_number from flask_compress import Compress from flask_restful import Api, Resource from flask_cors import CORS diff --git a/classes/compat.py b/classes/compat.py new file mode 100644 index 0000000..674b782 --- /dev/null +++ b/classes/compat.py @@ -0,0 +1,40 @@ +"""Compatibility utility helpers that replace astrocats package helpers.""" +import numbers + + +def is_integer(value): + """Return True when value can be interpreted as an integer.""" + if isinstance(value, bool): + return False + if isinstance(value, numbers.Integral): + return True + if value is None: + return False + try: + text = str(value).strip() + if text == "": + return False + if "." in text: + return False + int(text) + return True + except Exception: + return False + + +def is_number(value): + """Return True when value can be interpreted as a finite float.""" + if isinstance(value, bool): + return False + if isinstance(value, numbers.Real): + return True + if value is None: + return False + try: + text = str(value).strip() + if text == "": + return False + float(text) + return True + except Exception: + return False diff --git a/requirements.txt b/requirements.txt index b9ecfd7..bbc0cec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -astrocats numpy flask six From f3bc1e0b0839fca52a4427f3035fbe0957f0ca8d Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 18 Apr 2026 19:16:04 +0000 Subject: [PATCH 3/3] Store event JSON paths instead of full payloads in sqlite Co-authored-by: James Guillochon --- README.md | 3 ++ api.py | 69 ++++++++++++++----------------- classes/models.py | 2 +- classes/sqlite_store.py | 30 ++++++++------ docs/deployment-compose.md | 4 ++ scripts/ingest_static_catalogs.py | 25 ++++------- tests/test_migration_smoke.py | 35 +++++++++++++++- 7 files changed, 99 insertions(+), 69 deletions(-) diff --git a/README.md b/README.md index 9dda435..b7dfc29 100644 --- a/README.md +++ b/README.md @@ -154,6 +154,9 @@ Use environment variables to choose the backend: For SQLite mode: - `OAC_DB_PATH` points to the sqlite file (default `/data/oacapi.db`). +- SQLite stores summary/index data plus event file pointers only. Full event JSON + is loaded directly from the existing catalog `output/json` files, avoiding a + second copy of large event data. - Build the sqlite snapshot using: ```bash diff --git a/api.py b/api.py index 0fb7b00..e65931d 100644 --- a/api.py +++ b/api.py @@ -126,6 +126,9 @@ def get_filename(name): def get_output_json_path(name, cat): """Get full path to output JSON file.""" if apidata.use_sqlite: + pointer = apidata._store.get_event_pointer(cat, name) + if pointer and pointer.get('event_path'): + return pointer['event_path'] return None return os.path.join(apidata._AC_PATH, apidata._CATS[cat][0], 'output', 'json', get_filename(name)) @@ -580,11 +583,7 @@ def retrieve_objects( ename = '+'.join(list(sorted(set(ename_arr)))) - if qname is None: - # Short circuit to full if keyword is present. - if full: - return self.retrieve_objects( - catalog_name, event_name=ename, full=True) + if qname is None and not full: search_all = True if catalog_name not in apidata._CATS: qname = '+'.join(list(set(sorted([ @@ -665,45 +664,39 @@ def retrieve_objects( if full: if apidata.use_sqlite: lookup_names = [my_event] + [opt[1] for opt in alopts] - resolved_name, full_event = apidata._store.get_full_event_any_alias( + _, event_pointer = apidata._store.get_event_pointer_any_alias( my_cat, lookup_names ) - if full_event is None: - return msg('file_not_found', my_event) - fcatalogs[my_event] = full_event - sources[my_event] = [ - x.get('bibcode', x.get('arxivid', x.get('name'))) - for x in fcatalogs[my_event].get('sources', []) - ] + fpath = event_pointer.get('event_path') if event_pointer else None else: fpath = get_output_json_path(my_event, my_cat) - if not os.path.exists(fpath): - for opt in alopts: - fpath = None - if opt == my_event: - continue - fpath = get_output_json_path(opt, my_cat) - if os.path.exists(fpath): - logger.info( - '"{}.json" not found at expected path, ' - 'found at "{}.json" instead.'.format(my_event, opt)) - break - else: - logger.info( - '"{}.json" not found at expected path or ' - 'alternative paths [{}].' - .format(my_event, ', '.join(alopts))) - return msg('file_not_found', my_event) + if not fpath or not os.path.exists(fpath): + for opt in alopts: + if opt[1] == my_event: + continue + fpath = get_output_json_path(opt[1], my_cat) + if fpath and os.path.exists(fpath): + logger.info( + '"{}.json" not found at expected path, ' + 'found at "{}.json" instead.'.format(my_event, opt[1])) + break + else: + logger.info( + '"{}.json" not found at expected path or ' + 'alternative paths [{}].' + .format(my_event, ', '.join([x[1] for x in alopts]))) + return msg('file_not_found', my_event) + with open(fpath, 'r') as event_handle: file_event = json.load( - open(fpath, 'r'), object_pairs_hook=OrderedDict) - _, file_event[my_event] = file_event.popitem() - file_event[my_event]['catalog'] = my_cat - - fcatalogs.update(file_event) - sources[my_event] = [ - x.get('bibcode', x.get('arxivid', x.get('name'))) - for x in fcatalogs[my_event].get('sources')] + event_handle, object_pairs_hook=OrderedDict) + _, file_event[my_event] = file_event.popitem() + file_event[my_event]['catalog'] = my_cat + + fcatalogs.update(file_event) + sources[my_event] = [ + x.get('bibcode', x.get('arxivid', x.get('name'))) + for x in fcatalogs[my_event].get('sources')] if qname is None: if full: edict[event] = fcatalogs.get(my_event, {}) diff --git a/classes/models.py b/classes/models.py index 1c1eb1c..34af253 100644 --- a/classes/models.py +++ b/classes/models.py @@ -15,7 +15,7 @@ class EventRecord(Base): name = Column(String(255), nullable=False, index=True) normalized_name = Column(String(255), nullable=False, index=True) summary_json = Column(Text, nullable=False) - full_json = Column(Text, nullable=True) + event_path = Column(Text, nullable=True) ra_deg = Column(Float, nullable=True, index=True) dec_deg = Column(Float, nullable=True, index=True) diff --git a/classes/sqlite_store.py b/classes/sqlite_store.py index 7c524b2..b84ad10 100644 --- a/classes/sqlite_store.py +++ b/classes/sqlite_store.py @@ -26,6 +26,11 @@ def create_schema(self): """Create SQLAlchemy schema in the backing database.""" Base.metadata.create_all(self.engine) + def reset_schema(self): + """Drop and recreate all tables for deterministic rebuilds.""" + Base.metadata.drop_all(self.engine) + Base.metadata.create_all(self.engine) + def clear_all(self): """Delete all rows from tables.""" with Session(self.engine) as session: @@ -102,8 +107,8 @@ def load_alias_index(self): all_aliases.add(record.alias_raw.lower()) return aliases, all_aliases - def get_full_event(self, catalog, event_name): - """Return full event JSON from DB if available.""" + def get_event_pointer(self, catalog, event_name): + """Return event file pointer metadata for a catalog entry.""" with Session(self.engine) as session: record = session.execute( select(EventRecord).where( @@ -112,17 +117,18 @@ def get_full_event(self, catalog, event_name): ).scalar_one_or_none() if record is None: return None - payload = record.full_json if record.full_json else record.summary_json - full_event = json.loads(payload, object_pairs_hook=OrderedDict) - full_event["catalog"] = catalog - return full_event - - def get_full_event_any_alias(self, catalog, candidate_event_names): - """Resolve first full event payload for candidate names.""" + return { + "catalog": record.catalog, + "name": record.name, + "event_path": record.event_path, + } + + def get_event_pointer_any_alias(self, catalog, candidate_event_names): + """Resolve first event file pointer for candidate names.""" for event_name in candidate_event_names: - event = self.get_full_event(catalog, event_name) - if event is not None: - return event_name, event + event_pointer = self.get_event_pointer(catalog, event_name) + if event_pointer is not None: + return event_name, event_pointer return None, None def coordinates(self): diff --git a/docs/deployment-compose.md b/docs/deployment-compose.md index ef3e891..2d80f02 100644 --- a/docs/deployment-compose.md +++ b/docs/deployment-compose.md @@ -27,6 +27,10 @@ cp .env.example .env docker compose --profile ingest run --rm ingest ``` +The sqlite snapshot stores summary/index metadata and file pointers only. Full +event payloads are served directly from the mounted astrocatalog JSON files, so +the deployment does not create a second large copy of event data. + 3. Start API and MCP services: ```bash diff --git a/scripts/ingest_static_catalogs.py b/scripts/ingest_static_catalogs.py index 8768153..92091a9 100644 --- a/scripts/ingest_static_catalogs.py +++ b/scripts/ingest_static_catalogs.py @@ -58,17 +58,11 @@ def parse_degrees(event): return float(c.ra.deg), float(c.dec.deg) -def load_full_event(apidata, catalog, event_name): - """Load full event JSON if present, otherwise return None.""" +def event_json_path(apidata, catalog, event_name): + """Return absolute path to full event JSON for catalog/name.""" catalog_meta = apidata._CATS[catalog] base_dir = os.path.join(apidata._AC_PATH, catalog_meta[0], "output", "json") - primary_path = os.path.join(base_dir, event_name.replace("/", "_") + ".json") - if not os.path.exists(primary_path): - return None - payload = read_json(primary_path) - _, event = payload.popitem() - event["catalog"] = catalog - return event + return os.path.join(base_dir, event_name.replace("/", "_") + ".json") def ingest_catalogs(apidata): @@ -86,18 +80,16 @@ def ingest_catalogs(apidata): summary = OrderedDict(entry) summary["catalog"] = catalog ra_deg, dec_deg = parse_degrees(summary) - full_event = load_full_event(apidata, catalog, name) + full_event_path = event_json_path(apidata, catalog, name) + if not os.path.exists(full_event_path): + full_event_path = None event_rows.append( { "catalog": catalog, "name": name, "normalized_name": normalize_alias(name), "summary_json": json.dumps(summary, separators=(",", ":"), ensure_ascii=False), - "full_json": ( - json.dumps(full_event, separators=(",", ":"), ensure_ascii=False) - if full_event is not None - else None - ), + "event_path": full_event_path, "ra_deg": ra_deg, "dec_deg": dec_deg, } @@ -146,8 +138,7 @@ def main(): if db_dir: os.makedirs(db_dir, exist_ok=True) store = SqliteStore(args.db_path) - store.create_schema() - store.clear_all() + store.reset_schema() event_rows, alias_rows = ingest_catalogs(apidata) LOGGER.info("Ingesting %d events and %d aliases", len(event_rows), len(alias_rows)) diff --git a/tests/test_migration_smoke.py b/tests/test_migration_smoke.py index ace7ae1..feb9a98 100644 --- a/tests/test_migration_smoke.py +++ b/tests/test_migration_smoke.py @@ -1,4 +1,5 @@ """Smoke tests for sqlite-backed API and MCP-compatible query layer.""" +import json import os import tempfile import unittest @@ -27,9 +28,23 @@ class MigrationSmokeTests(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.TemporaryDirectory() self.db_path = os.path.join(self.tmpdir.name, "oacapi.db") + self.ac_path = os.path.join(self.tmpdir.name, "astrocats", "astrocats") os.environ["OAC_BACKEND"] = "sqlite" os.environ["OAC_DB_PATH"] = self.db_path + event_dir = os.path.join(self.ac_path, "supernovae", "output", "json") + os.makedirs(event_dir, exist_ok=True) + event_payload = { + "SNTEST": { + "name": "SNTEST", + "alias": [{"value": "SNTEST"}], + "redshift": [{"value": "0.1"}], + "sources": [{"name": "UnitTestSource"}], + } + } + with open(os.path.join(event_dir, "SNTEST.json"), "w") as handle: + json.dump(event_payload, handle) + store = SqliteStore(self.db_path) store.create_schema() store.insert_events( @@ -39,7 +54,7 @@ def setUp(self): "name": "SNTEST", "normalized_name": "sntest", "summary_json": '{"name":"SNTEST","catalog":"sne","alias":[{"value":"SNTEST"}],"ra":[{"value":"12:00:00"}],"dec":[{"value":"+02:00:00"}],"redshift":[{"value":"0.1"}]}', - "full_json": None, + "event_path": os.path.join(event_dir, "SNTEST.json"), "ra_deg": 180.0, "dec_deg": 2.0, } @@ -81,6 +96,24 @@ def test_query_service_executes(self): self.assertEqual(result["kind"], "json") self.assertIn("SNTEST", result["body"]) + def test_full_query_uses_event_json_path(self): + """Full queries should load event payload from JSON file path.""" + from api import Catalog, apidata + + apidata._AC_PATH = self.ac_path + resource = Catalog() + from api import app + + with app.test_request_context( + method="GET", + path="/", + query_string={"full": "1"}, + ): + result = resource.get("sne", event_name="SNTEST") + payload = result.get("SNTEST", {}) + self.assertIn("sources", payload) + self.assertEqual(payload["sources"][0]["name"], "UnitTestSource") + if __name__ == "__main__": unittest.main()