From 8fc1108f70ef345bc2f4e31b784d232c870d34e0 Mon Sep 17 00:00:00 2001
From: kraysent <kraysent@gmail.com>
Date: Mon, 16 Mar 2026 22:06:24 +0000
Subject: [PATCH 1/2] basic hyperleda photometry script

---
 app/structured/photometry/__init__.py |  0
 app/structured/photometry/upload.py   | 87 +++++++++++++++++++++++++++
 main.py                               | 31 ++++++++++
 3 files changed, 118 insertions(+)
 create mode 100644 app/structured/photometry/__init__.py
 create mode 100644 app/structured/photometry/upload.py

diff --git a/app/structured/photometry/__init__.py b/app/structured/photometry/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/structured/photometry/upload.py b/app/structured/photometry/upload.py
new file mode 100644
index 0000000..bbce201
--- /dev/null
+++ b/app/structured/photometry/upload.py
@@ -0,0 +1,87 @@
+from app import log
+from app.display import print_table
+from app.gen.client import adminapi
+from app.gen.client.adminapi.api.default import save_structured_data
+from app.gen.client.adminapi.models.save_structured_data_request import (
+    SaveStructuredDataRequest,
+)
+from app.lib.rawdata import rawdata_batches
+from app.storage import PgStorage
+from app.upload import handle_call
+
+PHOTOMETRY_COLUMNS = ["band", "mag", "e_mag", "method"]
+
+BANDS = [
+    ("U", "ut", "e_ut"),
+    ("B", "bt", "e_bt"),
+    ("V", "vt", "e_vt"),
+    ("I", "it", "e_it"),
+    ("K", "kt", "e_kt"),
+]
+
+PHOTOMETRY_RAW_COLUMNS = [c for _, mag, err in BANDS for c in (mag, err)]
+
+
+def upload_photometry_hyperleda(
+    storage: PgStorage,
+    table_name: str,
+    batch_size: int,
+    client: adminapi.AuthenticatedClient,
+    *,
+    write: bool = False,
+) -> None:
+    uploaded_rows = 0
+    uploaded_objects = 0
+    skipped = 0
+
+    for rows in rawdata_batches(storage, table_name, PHOTOMETRY_RAW_COLUMNS, batch_size):
+        batch_ids: list[str] = []
+        batch_data: list[list[str | float]] = []
+
+        for row in rows:
+            internal_id = row["hyperleda_internal_id"]
+            mag_vals = [row[mag_col] for _, mag_col, _ in BANDS]
+            err_vals = [row[err_col] for _, _, err_col in BANDS]
+            if any(m is None for m in mag_vals) or any(e is None for e in err_vals):
+                skipped += 1
+                continue
+            for (band, _, _), mag_val, err_val in zip(BANDS, mag_vals, err_vals, strict=True):
+                batch_ids.append(internal_id)
+                batch_data.append([band, float(mag_val), float(err_val), "asymptotic"])
+            uploaded_objects += 1
+            uploaded_rows += len(BANDS)
+
+        if write and batch_ids:
+            handle_call(
+                save_structured_data.sync_detailed(
+                    client=client,
+                    body=SaveStructuredDataRequest(
+                        catalog="photometry",
+                        columns=PHOTOMETRY_COLUMNS,
+                        ids=batch_ids,
+                        data=batch_data,
+                    ),
+                )
+            )
+
+        log.logger.info(
+            "processed batch",
+            objects=uploaded_objects,
+            photometry_rows=uploaded_rows,
+        )
+
+    total = uploaded_objects + skipped
+
+    def pct(n: int) -> float:
+        return (100.0 * n / total) if total else 0.0
+
+    table_rows: list[tuple[str, int, float | str]] = [
+        ("Uploaded (objects)", uploaded_objects, pct(uploaded_objects)),
+        ("Uploaded (photometry rows)", uploaded_rows, "-"),
+        ("Skipped (null mag/error)", skipped, pct(skipped)),
+    ]
+    print_table(
+        ("Status", "Count", "%"),
+        table_rows,
+        title=f"Total source rows: {total}\n",
+    )
diff --git a/main.py b/main.py
index 4ff0da8..de2133c 100644
--- a/main.py
+++ b/main.py
@@ -20,6 +20,9 @@
 from app.structured.designations import upload_designations as run_upload_designations
 from app.structured.icrs import upload_icrs as run_upload_icrs
 from app.structured.nature import upload_nature as run_upload_nature
+from app.structured.photometry.upload import (
+    upload_photometry_hyperleda as run_upload_photometry_hyperleda,
+)
 from app.structured.redshift import upload_redshift as run_upload_redshift
 
 env_map = {
@@ -208,6 +211,34 @@ def upload_structured_redshift(
         )
 
 
+@upload_structured.command(
+    "photometry-hyperleda",
+    help="Upload U/B/V/I/K asymptotic magnitudes from hyperleda_m000 to the photometry catalog.",
+)
+@click.option("--batch-size", default=10000, type=int, help="Source rows per batch")
+@click.option(
+    "--write",
+    is_flag=True,
+    help="Upload results to the API; default is to only print statistics (dry-run)",
+)
+@click.pass_context
+def upload_structured_photometry_hyperleda(
+    ctx: click.Context,
+    batch_size: int,
+    write: bool,
+) -> None:
+    common = ctx.obj.upload_structured_common
+    with connect(common["dsn"]) as conn:
+        storage = PgStorage(conn)
+        run_upload_photometry_hyperleda(
+            storage,
+            common["table_name"],
+            batch_size,
+            common["client"],
+            write=write,
+        )
+
+
 @upload_structured.command("nature", help="Upload object nature/type to the structured level.")
 @click.option(
     "--column-name",

From 027c282a1fa65e7f72fc5679ba258cfd7c6a3091 Mon Sep 17 00:00:00 2001
From: kraysent <kraysent@gmail.com>
Date: Mon, 16 Mar 2026 22:29:49 +0000
Subject: [PATCH 2/2] fix upload function

---
 app/lib/rawdata.py                  |   2 +-
 app/structured/photometry/upload.py | 110 ++++++++++++++++------------
 2 files changed, 66 insertions(+), 46 deletions(-)

diff --git a/app/lib/rawdata.py b/app/lib/rawdata.py
index ef085e4..1591df3 100644
--- a/app/lib/rawdata.py
+++ b/app/lib/rawdata.py
@@ -31,7 +31,7 @@ def rawdata_batches(
             break
         total += len(rows)
         log.logger.debug(
-            "processed batch",
+            "read batch",
             rows=len(rows),
             last_id=rows[-1]["hyperleda_internal_id"],
             total=total,
diff --git a/app/structured/photometry/upload.py b/app/structured/photometry/upload.py
index bbce201..5bab216 100644
--- a/app/structured/photometry/upload.py
+++ b/app/structured/photometry/upload.py
@@ -30,58 +30,78 @@ def upload_photometry_hyperleda(
     *,
     write: bool = False,
 ) -> None:
-    uploaded_rows = 0
     uploaded_objects = 0
     skipped = 0
+    total_source_rows = 0
+    band_counts: dict[str, int] = {band: 0 for band, _, _ in BANDS}
+    band_mag_sums: dict[str, float] = {band: 0.0 for band, _, _ in BANDS}
 
-    for rows in rawdata_batches(storage, table_name, PHOTOMETRY_RAW_COLUMNS, batch_size):
-        batch_ids: list[str] = []
-        batch_data: list[list[str | float]] = []
+    try:
+        for rows in rawdata_batches(storage, table_name, PHOTOMETRY_RAW_COLUMNS, batch_size):
+            total_source_rows += len(rows)
+            batch_ids: list[str] = []
+            batch_data: list[list[str | float]] = []
 
-        for row in rows:
-            internal_id = row["hyperleda_internal_id"]
-            mag_vals = [row[mag_col] for _, mag_col, _ in BANDS]
-            err_vals = [row[err_col] for _, _, err_col in BANDS]
-            if any(m is None for m in mag_vals) or any(e is None for e in err_vals):
-                skipped += 1
-                continue
-            for (band, _, _), mag_val, err_val in zip(BANDS, mag_vals, err_vals, strict=True):
-                batch_ids.append(internal_id)
-                batch_data.append([band, float(mag_val), float(err_val), "asymptotic"])
-            uploaded_objects += 1
-            uploaded_rows += len(BANDS)
+            for row in rows:
+                internal_id = row["hyperleda_internal_id"]
+                had_any = False
+                for band, mag_col, err_col in BANDS:
+                    mag_val = row.get(mag_col)
+                    err_val = row.get(err_col)
+                    if mag_val is not None and err_val is not None:
+                        batch_ids.append(internal_id)
+                        batch_data.append([band, float(mag_val), float(err_val), "asymptotic"])
+                        band_counts[band] += 1
+                        band_mag_sums[band] += float(mag_val)
+                        had_any = True
+                if had_any:
+                    uploaded_objects += 1
+                else:
+                    skipped += 1
 
-        if write and batch_ids:
-            handle_call(
-                save_structured_data.sync_detailed(
-                    client=client,
-                    body=SaveStructuredDataRequest(
-                        catalog="photometry",
-                        columns=PHOTOMETRY_COLUMNS,
-                        ids=batch_ids,
-                        data=batch_data,
-                    ),
+            if write and batch_ids:
+                handle_call(
+                    save_structured_data.sync_detailed(
+                        client=client,
+                        body=SaveStructuredDataRequest(
+                            catalog="photometry",
+                            columns=PHOTOMETRY_COLUMNS,
+                            ids=batch_ids,
+                            data=batch_data,
+                        ),
+                    )
                 )
-            )
 
-        log.logger.info(
-            "processed batch",
-            objects=uploaded_objects,
-            photometry_rows=uploaded_rows,
-        )
+            uploaded_rows = sum(band_counts.values())
+            log.logger.info(
+                "processed batch",
+                source_rows=len(rows),
+                total_source_rows=total_source_rows,
+                objects=uploaded_objects,
+                photometry_rows=uploaded_rows,
+            )
+    finally:
+        total = uploaded_objects + skipped
+        total_photometry_rows = sum(band_counts.values())
 
-    total = uploaded_objects + skipped
+        def pct(n: int, denom: int) -> float:
+            return (100.0 * n / denom) if denom else 0.0
 
-    def pct(n: int) -> float:
-        return (100.0 * n / total) if total else 0.0
+        table_rows: list[tuple[str | int | float, ...]] = [
+            ("Source rows with ≥1 band", uploaded_objects, f"{pct(uploaded_objects, total):.1f}%", "-"),
+            ("Source rows with no band", skipped, f"{pct(skipped, total):.1f}%", "-"),
+            ("Total photometry rows", total_photometry_rows, "-", "-"),
+        ]
+        for band, _, _ in BANDS:
+            count = band_counts[band]
+            avg_mag = (band_mag_sums[band] / count) if count else 0.0
+            pct_str = f"{pct(count, total_photometry_rows):.1f}%" if total_photometry_rows else "-"
+            avg_str = round(avg_mag, 3) if count else "-"
+            table_rows.append((band, count, pct_str, avg_str))
 
-    table_rows: list[tuple[str, int, float | str]] = [
-        ("Uploaded (objects)", uploaded_objects, pct(uploaded_objects)),
-        ("Uploaded (photometry rows)", uploaded_rows, "-"),
-        ("Skipped (null mag/error)", skipped, pct(skipped)),
-    ]
-    print_table(
-        ("Status", "Count", "%"),
-        table_rows,
-        title=f"Total source rows: {total}\n",
-    )
+        print_table(
+            ("Status", "Uploaded", "% of total", "Avg mag"),
+            table_rows,
+            title=f"Total source rows: {total}\n",
+            percent_last_column=False,
+        )