From 8fc1108f70ef345bc2f4e31b784d232c870d34e0 Mon Sep 17 00:00:00 2001 From: kraysent Date: Mon, 16 Mar 2026 22:06:24 +0000 Subject: [PATCH 1/2] basic hyperleda photometry script --- app/structured/photometry/__init__.py | 0 app/structured/photometry/upload.py | 87 +++++++++++++++++++++++++++ main.py | 31 ++++++++++ 3 files changed, 118 insertions(+) create mode 100644 app/structured/photometry/__init__.py create mode 100644 app/structured/photometry/upload.py diff --git a/app/structured/photometry/__init__.py b/app/structured/photometry/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/structured/photometry/upload.py b/app/structured/photometry/upload.py new file mode 100644 index 0000000..bbce201 --- /dev/null +++ b/app/structured/photometry/upload.py @@ -0,0 +1,87 @@ +from app import log +from app.display import print_table +from app.gen.client import adminapi +from app.gen.client.adminapi.api.default import save_structured_data +from app.gen.client.adminapi.models.save_structured_data_request import ( + SaveStructuredDataRequest, +) +from app.lib.rawdata import rawdata_batches +from app.storage import PgStorage +from app.upload import handle_call + +PHOTOMETRY_COLUMNS = ["band", "mag", "e_mag", "method"] + +BANDS = [ + ("U", "ut", "e_ut"), + ("B", "bt", "e_bt"), + ("V", "vt", "e_vt"), + ("I", "it", "e_it"), + ("K", "kt", "e_kt"), +] + +PHOTOMETRY_RAW_COLUMNS = [c for _, mag, err in BANDS for c in (mag, err)] + + +def upload_photometry_hyperleda( + storage: PgStorage, + table_name: str, + batch_size: int, + client: adminapi.AuthenticatedClient, + *, + write: bool = False, +) -> None: + uploaded_rows = 0 + uploaded_objects = 0 + skipped = 0 + + for rows in rawdata_batches(storage, table_name, PHOTOMETRY_RAW_COLUMNS, batch_size): + batch_ids: list[str] = [] + batch_data: list[list[str | float]] = [] + + for row in rows: + internal_id = row["hyperleda_internal_id"] + mag_vals = [row[mag_col] for _, mag_col, _ in BANDS] + err_vals = [row[err_col] for _, _, err_col in BANDS] + if any(m is None for m in mag_vals) or any(e is None for e in err_vals): + skipped += 1 + continue + for (band, _, _), mag_val, err_val in zip(BANDS, mag_vals, err_vals, strict=True): + batch_ids.append(internal_id) + batch_data.append([band, float(mag_val), float(err_val), "asymptotic"]) + uploaded_objects += 1 + uploaded_rows += len(BANDS) + + if write and batch_ids: + handle_call( + save_structured_data.sync_detailed( + client=client, + body=SaveStructuredDataRequest( + catalog="photometry", + columns=PHOTOMETRY_COLUMNS, + ids=batch_ids, + data=batch_data, + ), + ) + ) + + log.logger.info( + "processed batch", + objects=uploaded_objects, + photometry_rows=uploaded_rows, + ) + + total = uploaded_objects + skipped + + def pct(n: int) -> float: + return (100.0 * n / total) if total else 0.0 + + table_rows: list[tuple[str, int, float | str]] = [ + ("Uploaded (objects)", uploaded_objects, pct(uploaded_objects)), + ("Uploaded (photometry rows)", uploaded_rows, "-"), + ("Skipped (null mag/error)", skipped, pct(skipped)), + ] + print_table( + ("Status", "Count", "%"), + table_rows, + title=f"Total source rows: {total}\n", + ) diff --git a/main.py b/main.py index 4ff0da8..de2133c 100644 --- a/main.py +++ b/main.py @@ -20,6 +20,9 @@ from app.structured.designations import upload_designations as run_upload_designations from app.structured.icrs import upload_icrs as run_upload_icrs from app.structured.nature import upload_nature as run_upload_nature +from app.structured.photometry.upload import ( + upload_photometry_hyperleda as run_upload_photometry_hyperleda, +) from app.structured.redshift import upload_redshift as run_upload_redshift env_map = { @@ -208,6 +211,34 @@ def upload_structured_redshift( ) +@upload_structured.command( + "photometry-hyperleda", + help="Upload U/B/V/I/K asymptotic magnitudes from hyperleda_m000 to the photometry catalog.", +) +@click.option("--batch-size", default=10000, type=int, help="Source rows per batch") +@click.option( + "--write", + is_flag=True, + help="Upload results to the API; default is to only print statistics (dry-run)", +) +@click.pass_context +def upload_structured_photometry_hyperleda( + ctx: click.Context, + batch_size: int, + write: bool, +) -> None: + common = ctx.obj.upload_structured_common + with connect(common["dsn"]) as conn: + storage = PgStorage(conn) + run_upload_photometry_hyperleda( + storage, + common["table_name"], + batch_size, + common["client"], + write=write, + ) + + @upload_structured.command("nature", help="Upload object nature/type to the structured level.") @click.option( "--column-name", From 027c282a1fa65e7f72fc5679ba258cfd7c6a3091 Mon Sep 17 00:00:00 2001 From: kraysent Date: Mon, 16 Mar 2026 22:29:49 +0000 Subject: [PATCH 2/2] fix upload function --- app/lib/rawdata.py | 2 +- app/structured/photometry/upload.py | 110 ++++++++++++++++------------ 2 files changed, 66 insertions(+), 46 deletions(-) diff --git a/app/lib/rawdata.py b/app/lib/rawdata.py index ef085e4..1591df3 100644 --- a/app/lib/rawdata.py +++ b/app/lib/rawdata.py @@ -31,7 +31,7 @@ def rawdata_batches( break total += len(rows) log.logger.debug( - "processed batch", + "read batch", rows=len(rows), last_id=rows[-1]["hyperleda_internal_id"], total=total, diff --git a/app/structured/photometry/upload.py b/app/structured/photometry/upload.py index bbce201..5bab216 100644 --- a/app/structured/photometry/upload.py +++ b/app/structured/photometry/upload.py @@ -30,58 +30,78 @@ def upload_photometry_hyperleda( *, write: bool = False, ) -> None: - uploaded_rows = 0 uploaded_objects = 0 skipped = 0 + total_source_rows = 0 + band_counts: dict[str, int] = {band: 0 for band, _, _ in BANDS} + band_mag_sums: dict[str, float] = {band: 0.0 for band, _, _ in BANDS} - for rows in rawdata_batches(storage, table_name, PHOTOMETRY_RAW_COLUMNS, batch_size): - batch_ids: list[str] = [] - batch_data: list[list[str | float]] = [] + try: + for rows in rawdata_batches(storage, table_name, PHOTOMETRY_RAW_COLUMNS, batch_size): + total_source_rows += len(rows) + batch_ids: list[str] = [] + batch_data: list[list[str | float]] = [] - for row in rows: - internal_id = row["hyperleda_internal_id"] - mag_vals = [row[mag_col] for _, mag_col, _ in BANDS] - err_vals = [row[err_col] for _, _, err_col in BANDS] - if any(m is None for m in mag_vals) or any(e is None for e in err_vals): - skipped += 1 - continue - for (band, _, _), mag_val, err_val in zip(BANDS, mag_vals, err_vals, strict=True): - batch_ids.append(internal_id) - batch_data.append([band, float(mag_val), float(err_val), "asymptotic"]) - uploaded_objects += 1 - uploaded_rows += len(BANDS) + for row in rows: + internal_id = row["hyperleda_internal_id"] + had_any = False + for band, mag_col, err_col in BANDS: + mag_val = row.get(mag_col) + err_val = row.get(err_col) + if mag_val is not None and err_val is not None: + batch_ids.append(internal_id) + batch_data.append([band, float(mag_val), float(err_val), "asymptotic"]) + band_counts[band] += 1 + band_mag_sums[band] += float(mag_val) + had_any = True + if had_any: + uploaded_objects += 1 + else: + skipped += 1 - if write and batch_ids: - handle_call( - save_structured_data.sync_detailed( - client=client, - body=SaveStructuredDataRequest( - catalog="photometry", - columns=PHOTOMETRY_COLUMNS, - ids=batch_ids, - data=batch_data, - ), + if write and batch_ids: + handle_call( + save_structured_data.sync_detailed( + client=client, + body=SaveStructuredDataRequest( + catalog="photometry", + columns=PHOTOMETRY_COLUMNS, + ids=batch_ids, + data=batch_data, + ), + ) ) - ) - log.logger.info( - "processed batch", - objects=uploaded_objects, - photometry_rows=uploaded_rows, - ) + uploaded_rows = sum(band_counts.values()) + log.logger.info( + "processed batch", + source_rows=len(rows), + total_source_rows=total_source_rows, + objects=uploaded_objects, + photometry_rows=uploaded_rows, + ) + finally: + total = uploaded_objects + skipped + total_photometry_rows = sum(band_counts.values()) - total = uploaded_objects + skipped + def pct(n: int, denom: int) -> float: + return (100.0 * n / denom) if denom else 0.0 - def pct(n: int) -> float: - return (100.0 * n / total) if total else 0.0 + table_rows: list[tuple[str | int | float, ...]] = [ + ("Source rows with ≥1 band", uploaded_objects, f"{pct(uploaded_objects, total):.1f}%", "-"), + ("Source rows with no band", skipped, f"{pct(skipped, total):.1f}%", "-"), + ("Total photometry rows", total_photometry_rows, "-", "-"), + ] + for band, _, _ in BANDS: + count = band_counts[band] + avg_mag = (band_mag_sums[band] / count) if count else 0.0 + pct_str = f"{pct(count, total_photometry_rows):.1f}%" if total_photometry_rows else "-" + avg_str = round(avg_mag, 3) if count else "-" + table_rows.append((band, count, pct_str, avg_str)) - table_rows: list[tuple[str, int, float | str]] = [ - ("Uploaded (objects)", uploaded_objects, pct(uploaded_objects)), - ("Uploaded (photometry rows)", uploaded_rows, "-"), - ("Skipped (null mag/error)", skipped, pct(skipped)), - ] - print_table( - ("Status", "Count", "%"), - table_rows, - title=f"Total source rows: {total}\n", - ) + print_table( + ("Status", "Uploaded", "% of total", "Avg mag"), + table_rows, + title=f"Total source rows: {total}\n", + percent_last_column=False, + )