From 90d506080f9a477d8c3a3482c5ec6f241a68491e Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Wed, 18 Mar 2026 18:07:17 +0000 Subject: [PATCH 01/11] Removed API endpoints related to registering unneeded CLEM tables --- src/murfey/server/api/clem.py | 736 +--------------------------- src/murfey/util/route_manifest.yaml | 41 +- 2 files changed, 14 insertions(+), 763 deletions(-) diff --git a/src/murfey/server/api/clem.py b/src/murfey/server/api/clem.py index df484e7f1..d094b287a 100644 --- a/src/murfey/server/api/clem.py +++ b/src/murfey/server/api/clem.py @@ -1,7 +1,5 @@ from __future__ import annotations -import re -import traceback from ast import literal_eval from importlib.metadata import ( EntryPoint, # type hinting only @@ -9,25 +7,15 @@ ) from logging import getLogger from pathlib import Path -from typing import Literal, Optional, Type, Union +from typing import Literal, Optional from fastapi import APIRouter from pydantic import BaseModel, field_validator -from sqlalchemy.exc import NoResultFound from sqlmodel import Session, select +import murfey.util.db as MurfeyDB from murfey.server import _transport_object from murfey.server.murfey_db import murfey_db -from murfey.util import sanitise -from murfey.util.config import get_machine_config -from murfey.util.db import ( - CLEMImageMetadata, - CLEMImageSeries, - CLEMImageStack, - CLEMLIFFile, - CLEMTIFFFile, - Session as MurfeySession, -) # Set up logger logger = getLogger("murfey.server.api.clem") @@ -38,714 +26,12 @@ tags=["Workflows: CLEM"], ) -# Valid file types -valid_file_types = ( - ".lif", - ".tif", - ".tiff", - ".xlif", - ".xml", -) - - -""" -HELPER FUNCTIONS -""" - - -def validate_and_sanitise( - file: Path, - session_id: int, - db: Session, -) -> Path: - """ - Performs validation and sanitisation on the incoming file paths, ensuring that - no forbidden characters are present and that the the path points only to allowed - sections of the file server. - - Returns the file path as a sanitised string that can be converted into a Path - object again. - - NOTE: Due to the instrument name query, 'db' now needs to be passed as an - explicit variable to this function from within a FastAPI endpoint, as using the - instance that was imported directly won't load it in the correct state. - """ - - # Resolve symlinks and directory changes to get full file path - full_path = Path(file).resolve() - - # Use machine configuration to validate which file base paths are accepted from - instrument_name = ( - db.exec(select(MurfeySession).where(MurfeySession.id == session_id)) - .one() - .instrument_name - ) - machine_config = get_machine_config(instrument_name=instrument_name)[ - instrument_name - ] - rsync_basepath = (machine_config.rsync_basepath or Path("")).resolve() - - # Check that full file path doesn't contain unallowed characters - # Currently allows only: - # - words (alphanumerics and "_"; \w), - # - spaces (\s), - # - periods, - # - dashes, - # - forward slashes ("/") - if bool(re.fullmatch(r"^[\w\s\.\-/]+$", str(full_path))) is False: - raise ValueError(f"Unallowed characters present in {file}") - - # Check that it's not accessing somehwere it's not allowed - if not str(full_path).startswith(str(rsync_basepath)): - raise ValueError(f"{file} points to a directory that is not permitted") - - # Check that it is of a permitted file type - if f"{full_path.suffix}" not in valid_file_types: - raise ValueError(f"{full_path.suffix} is not a permitted file format") - - return full_path - - -def get_db_entry( - db: Session, - # With the database search funcion having been moved out of the FastAPI - # endpoint, the database now has to be explicitly passed within the FastAPI - # endpoint function in order for it to be loaded in the correct state. - table: Type[ - Union[ - CLEMImageMetadata, - CLEMImageSeries, - CLEMImageStack, - CLEMLIFFile, - CLEMTIFFFile, - ] - ], - session_id: int, - file_path: Optional[Path] = None, - series_name: Optional[str] = None, -) -> Union[ - CLEMImageMetadata, - CLEMImageSeries, - CLEMImageStack, - CLEMLIFFile, - CLEMTIFFFile, -]: - """ - Searches the CLEM workflow-related tables in the Murfey database for an entry that - matches the file path or series name within a given session. Returns the entry if - a match is found, otherwise register it as a new entry in the database. - """ - - # Validate that parameters are provided correctly - if file_path is None and series_name is None: - raise ValueError( - "One of either 'file_path' or 'series_name' has to be provided" - ) - if file_path is not None and series_name is not None: - raise ValueError("Only one of 'file_path' or 'series_name' should be provided") - - # Validate file path if provided - if file_path is not None: - try: - file_path = validate_and_sanitise(file_path, session_id, db) - except Exception: - raise Exception - - # Validate series name to use - if series_name is not None: - if bool(re.fullmatch(r"^[\w\s\.\-/]+$", series_name)) is False: - raise ValueError("One or more characters in the string are not permitted") - - # Return database entry if it exists - try: - db_entry = ( - db.exec( - select(table) - .where(table.session_id == session_id) - .where(table.file_path == str(file_path)) - ).one() - if file_path is not None - else db.exec( - select(table) - .where(table.session_id == session_id) - .where(table.series_name == series_name) - ).one() - ) - # Create and register new entry if not present - except NoResultFound: - db_entry = ( - table( - file_path=str(file_path), - session_id=session_id, - ) - if file_path is not None - else table( - series_name=series_name, - session_id=session_id, - ) - ) - db.add(db_entry) - db.commit() - - except Exception: - raise Exception - - return db_entry - - -""" -API ENDPOINTS FOR FILE REGISTRATION -""" - class LifInfo(BaseModel): lif_file: Path - master_metadata: Optional[Path] = None - child_metadata: list[Path] = [] - child_series: list[str] = [] - child_stacks: list[Path] = [] - - -@router.post("/sessions/{session_id}/clem/lif_files") -def register_lif_file( - lif_file: LifInfo, - session_id: int, - db: Session = murfey_db, -): - # Return or register the LIF file entry - try: - clem_lif_file: CLEMLIFFile = get_db_entry( - db=db, - table=CLEMLIFFile, - session_id=session_id, - file_path=lif_file.lif_file, - ) - except Exception: - logger.error( - "Exception encountered while registering " - f"LIF file {sanitise(str(lif_file))!r}: \n" - f"{traceback.format_exc()}" - ) - return False - - # Add metadata information if provided - if lif_file.master_metadata is not None: - try: - master_metadata = validate_and_sanitise( - lif_file.master_metadata, session_id, db - ) - clem_lif_file.master_metadata = str(master_metadata) - except Exception: - logger.warning( - "Unable to add master metadata information to database entry for " - f"LIF file {sanitise(str(lif_file))!r}: \n" - f"{traceback.format_exc()}" - ) - - # Register child metadata if provided - for metadata in lif_file.child_metadata: - try: - metadata_db_entry: CLEMImageMetadata = get_db_entry( - db=db, - table=CLEMImageMetadata, - session_id=session_id, - file_path=metadata, - ) - # Append to database entry - clem_lif_file.child_metadata.append(metadata_db_entry) - except Exception: - logger.warning( - "Unable to register " - f"metadata file {sanitise(str(metadata))!r} in association with " - f"LIF file {sanitise(str(lif_file))!r}: \n" - f"{traceback.format_exc()}" - ) - continue - - # Register child image series if provided - for series in lif_file.child_series: - try: - series_db_entry: CLEMImageSeries = get_db_entry( - db=db, - table=CLEMImageSeries, - session_id=session_id, - series_name=series, - ) - # Append to database entry - clem_lif_file.child_series.append(series_db_entry) - except Exception: - logger.warning( - "Unable to register " - f"metadata file {sanitise(series)!r} in association with " - f"LIF file {sanitise(str(lif_file))!r}: \n" - f"{traceback.format_exc()}" - ) - continue - - # Register child image stacks if provided - for stack in lif_file.child_stacks: - try: - stack_db_entry: CLEMImageStack = get_db_entry( - db=db, - table=CLEMImageStack, - session_id=session_id, - file_path=stack, - ) - # Append to database entry - clem_lif_file.child_stacks.append(stack_db_entry) - except Exception: - logger.warning( - "Unable to register " - f"image stack {sanitise(str(stack))!r} in association with " - f"LIF file {sanitise(str(lif_file))!r}: \n" - f"{traceback.format_exc()}" - ) - continue - - # Commit to database - db.add(clem_lif_file) - db.commit() - db.close() - return True - - -class TiffInfo(BaseModel): - tiff_file: Path - associated_metadata: Optional[Path] = None - associated_series: Optional[str] = None - associated_stack: Optional[Path] = None - - -@router.post("/sessions/{session_id}/clem/tiff_files") -def register_tiff_file( - tiff_file: TiffInfo, - session_id: int, - db: Session = murfey_db, -): - # Get or register the database entry - try: - clem_tiff_file: CLEMTIFFFile = get_db_entry( - db=db, - table=CLEMTIFFFile, - session_id=session_id, - file_path=tiff_file.tiff_file, - ) - except Exception: - logger.error( - "Exception encountered while registering " - f"TIFF file {sanitise(str(tiff_file))!r}: \n" - f"{traceback.format_exc()}" - ) - return False - - # Add metadata if provided - if tiff_file.associated_metadata is not None: - try: - metadata_db_entry: CLEMImageMetadata = get_db_entry( - db=db, - table=CLEMImageMetadata, - session_id=session_id, - file_path=tiff_file.associated_metadata, - ) - # Link database entries - clem_tiff_file.associated_metadata = metadata_db_entry - except Exception: - logger.warning( - "Unable to register " - f"metadata file {sanitise(str(tiff_file.associated_metadata))!r} in association with " - f"TIFF file {sanitise(str(tiff_file))!r}: \n" - f"{traceback.format_exc()}" - ) - - # Add series information if provided - if tiff_file.associated_series is not None: - try: - series_db_entry: CLEMImageSeries = get_db_entry( - db=db, - table=CLEMImageSeries, - session_id=session_id, - series_name=tiff_file.associated_series, - ) - # Link database entries - clem_tiff_file.child_series = series_db_entry - except Exception: - logger.warning( - "Unable to register " - f"CLEM series {sanitise(tiff_file.associated_series)!r} in association with " - f"TIFF file {sanitise(str(tiff_file))!r}: \n" - f"{traceback.format_exc()}" - ) - - # Add image stack information if provided - if tiff_file.associated_stack is not None: - try: - stack_db_entry: CLEMImageStack = get_db_entry( - db=db, - table=CLEMImageStack, - session_id=session_id, - file_path=tiff_file.associated_stack, - ) - # Link database entries - clem_tiff_file.child_stack = stack_db_entry - except Exception: - logger.warning( - "Unable to register " - f"image stack {sanitise(str(tiff_file.associated_stack))!r} in association with " - f"{traceback.format_exc()}" - ) - - # Commit to database - db.add(clem_tiff_file) - db.commit() - db.close() - return True - - -@router.post("/sessions/{session_id}/clem/metadata_files") -def register_clem_metadata( - metadata_file: Path, - session_id: int, - parent_lif: Optional[Path] = None, - associated_tiffs: list[Path] = [], - associated_series: Optional[str] = None, - associated_stacks: list[Path] = [], - db: Session = murfey_db, -): - # Return database entry if it already exists - try: - clem_metadata: CLEMImageMetadata = get_db_entry( - db=db, - table=CLEMImageMetadata, - session_id=session_id, - file_path=metadata_file, - ) - except Exception: - logger.error( - "Exception encountered while registering" - f"metadata file {sanitise(str(metadata_file))!r}" - f"{traceback.format_exc()}" - ) - return False - - # Register a parent LIF file if provided - if parent_lif is not None: - try: - lif_db_entry: CLEMLIFFile = get_db_entry( - db=db, - table=CLEMLIFFile, - session_id=session_id, - file_path=parent_lif, - ) - # Link database entries - clem_metadata.parent_lif = lif_db_entry - except Exception: - logger.warning( - "Unable to register " - f"LIF file {sanitise(str(parent_lif))!r} in association with " - f"metadata file {sanitise(str(metadata_file))!r}: \n" - f"{traceback.format_exc()}" - ) - - # Register associated TIFF files if provided - for tiff in associated_tiffs: - try: - tiff_db_entry: CLEMTIFFFile = get_db_entry( - db=db, - table=CLEMTIFFFile, - session_id=session_id, - file_path=tiff, - ) - # Append entry - clem_metadata.associated_tiffs.append(tiff_db_entry) - except Exception: - logger.warning( - "Unable to register " - f"TIFF file {sanitise(str(tiff))!r} in association with " - f"metadata file {sanitise(str(metadata_file))!r}: \n" - f"{traceback.format_exc()}" - ) - continue - - # Register associated image series if provided - if associated_series is not None: - try: - series_db_entry: CLEMImageSeries = get_db_entry( - db=db, - table=CLEMImageSeries, - session_id=session_id, - series_name=associated_series, - ) - # The link can only be made from series-side; not sure why - series_db_entry.associated_metadata = clem_metadata - db.add(series_db_entry) - db.commit() - except Exception: - logger.warning( - "Unable to register " - f"CLEM series {sanitise(associated_series)!r} in association with " - f"metadata file {sanitise(str(metadata_file))!r}: \n" - f"{traceback.format_exc()}" - ) - - # Register associated image stacks if provided - for stack in associated_stacks: - try: - stack_db_entry: CLEMImageStack = get_db_entry( - db=db, - table=CLEMImageStack, - session_id=session_id, - file_path=stack, - ) - clem_metadata.associated_stacks.append(stack_db_entry) - except Exception: - logger.warning( - "Unable to register " - f"image stack {sanitise(str(stack))!r} in association with " - f"metadata file {sanitise(str(metadata_file))!r}: \n" - f"{traceback.format_exc()}" - ) - continue - - # Commit to database - db.add(clem_metadata) - db.commit() - db.close() - return True - - -@router.post("/sessions/{session_id}/clem/image_series") -def register_image_series( - series_name: str, - session_id: int, - parent_lif: Optional[Path] = None, - parent_tiffs: list[Path] = [], - associated_metadata: Optional[Path] = None, - child_stacks: list[Path] = [], - db: Session = murfey_db, -): - # Get or register series - try: - clem_image_series: CLEMImageSeries = get_db_entry( - db=db, - table=CLEMImageSeries, - session_id=session_id, - series_name=series_name, - ) - except Exception: - logger.error( - "Exception encountered while registering " - f"CLEM series {sanitise(series_name)!r}: \n" - f"{traceback.format_exc()}" - ) - return False - - # Register parent LIF file if provided - if parent_lif is not None: - try: - lif_db_entry: CLEMLIFFile = get_db_entry( - db=db, - table=CLEMLIFFile, - session_id=session_id, - file_path=parent_lif, - ) - # Link entries - clem_image_series.parent_lif = lif_db_entry - except Exception: - logger.warning( - "Unable to register " - f"LIF file {sanitise(str(parent_lif))!r} in association with " - f"CLEM series {sanitise(series_name)!r}: \n" - f"{traceback.format_exc()}" - ) - - # Register parent TIFFs if provided - for tiff in parent_tiffs: - try: - tiff_db_entry: CLEMTIFFFile = get_db_entry( - db=db, - table=CLEMTIFFFile, - session_id=session_id, - file_path=tiff, - ) - # Append entry - clem_image_series.parent_tiffs.append(tiff_db_entry) - except Exception: - logger.warning( - "Unable to register " - f"TIFF file {sanitise(str(tiff))!r} in association with " - f"CLEM series {sanitise(series_name)!r}: \n" - f"{traceback.format_exc()}" - ) - continue # Try next item in loop - - # Register associated metadata if provided - if associated_metadata is not None: - try: - metadata_db_entry: CLEMImageMetadata = get_db_entry( - db=db, - table=CLEMImageMetadata, - session_id=session_id, - file_path=associated_metadata, - ) - # Link entries - clem_image_series.associated_metadata = metadata_db_entry - except Exception: - logger.warning( - "Unable to register " - f"metadata file {sanitise(str(associated_metadata))!r} in association with " - f"CLEM series {sanitise(series_name)!r}: \n" - f"{traceback.format_exc()}" - ) - - # Register child image stacks if provided - for stack in child_stacks: - try: - stack_db_entry: CLEMImageStack = get_db_entry( - db=db, - table=CLEMImageStack, - session_id=session_id, - file_path=stack, - ) - # Append entry - clem_image_series.child_stacks.append(stack_db_entry) - except Exception: - logger.warning( - "Unable to register " - f"image stack {sanitise(str(stack))!r} in association with " - f"CLEM series {sanitise(series_name)!r}: \n" - f"{traceback.format_exc()}" - ) - continue - - # Register - db.add(clem_image_series) - db.commit() - db.close() - return True - - -@router.post("/sessions/{session_id}/clem/image_stacks") -def register_image_stack( - image_stack: Path, - session_id: int, - channel: Optional[str] = None, - parent_lif: Optional[Path] = None, - parent_tiffs: list[Path] = [], - associated_metadata: Optional[Path] = None, - parent_series: Optional[str] = None, - db: Session = murfey_db, -): - # Get or register image stack entry - try: - clem_image_stack: CLEMImageStack = get_db_entry( - db=db, - table=CLEMImageStack, - session_id=session_id, - file_path=image_stack, - ) - except Exception: - logger.error( - "Exception encountered while registering " - f"image stack {sanitise(str(image_stack))!r}: \n" - f"{traceback.format_exc()}" - ) - return False - - # Register channel name if provided - if channel is not None: - clem_image_stack.channel_name = channel - - # Register parent LIF file if provided - if parent_lif is not None: - try: - lif_db_entry: CLEMLIFFile = get_db_entry( - db=db, - table=CLEMLIFFile, - session_id=session_id, - file_path=parent_lif, - ) - clem_image_stack.parent_lif = lif_db_entry - except Exception: - logger.warning( - "Unable to register " - f"LIF file {sanitise(str(parent_lif))!r} in association with " - f"image stack {sanitise(str(image_stack))!r}: \n" - f"{traceback.format_exc()}" - ) - - # Register parent TIFF files if provided - for tiff in parent_tiffs: - try: - tiff_db_entry: CLEMTIFFFile = get_db_entry( - db=db, - table=CLEMTIFFFile, - session_id=session_id, - file_path=tiff, - ) - # Append entry - clem_image_stack.parent_tiffs.append(tiff_db_entry) - except Exception: - logger.warning( - "Unable to register " - f"TIFF file {sanitise(str(tiff))!r} in association with " - f"image stack {sanitise(str(image_stack))!r}: \n" - f"{traceback.format_exc()}" - ) - continue - - # Register associated metadata if provided - if associated_metadata is not None: - try: - metadata_db_entry: CLEMImageMetadata = get_db_entry( - db=db, - table=CLEMImageMetadata, - session_id=session_id, - file_path=associated_metadata, - ) - # Link entries - clem_image_stack.associated_metadata = metadata_db_entry - except Exception: - logger.warning( - "Unable to register " - f"metadata file {sanitise(str(associated_metadata))!r} in association with " - f"image stack {sanitise(str(image_stack))!r}: \n" - f"{traceback.format_exc()}" - ) - - # Register parent series if provided - if parent_series is not None: - try: - series_db_entry: CLEMImageSeries = get_db_entry( - db=db, - table=CLEMImageSeries, - session_id=session_id, - series_name=parent_series, - ) - # Link entries - clem_image_stack.parent_series = series_db_entry - except Exception: - logger.warning( - "Unable to register " - f"CLEM series {sanitise(parent_series)!r} in association with " - f"image stack {sanitise(str(image_stack))!r}: \n" - f"{traceback.format_exc()}" - ) - - # Register updates to entry - db.add(clem_image_stack) - db.commit() - db.close() - return True - - -""" -API ENDPOINTS FOR FILE PROCESSING -""" -@router.post( - "/sessions/{session_id}/clem/preprocessing/process_raw_lifs" -) # API posts to this URL +@router.post("/sessions/{session_id}/process_raw_lifs") # API posts to this URL def process_raw_lifs( session_id: int, lif_file: LifInfo, @@ -760,8 +46,8 @@ def process_raw_lifs( raise RuntimeError("The relevant Murfey workflow was not found") # Get instrument name from the database to load the correct config file - session_row: MurfeySession = db.exec( - select(MurfeySession).where(MurfeySession.id == session_id) + session_row: MurfeyDB.Session = db.exec( + select(MurfeyDB.Session).where(MurfeyDB.Session.id == session_id) ).one() instrument_name = session_row.instrument_name @@ -783,7 +69,7 @@ class TIFFSeriesInfo(BaseModel): series_metadata: Path -@router.post("/sessions/{session_id}/clem/preprocessing/process_raw_tiffs") +@router.post("/sessions/{session_id}/process_raw_tiffs") def process_raw_tiffs( session_id: int, tiff_info: TIFFSeriesInfo, @@ -798,8 +84,8 @@ def process_raw_tiffs( raise RuntimeError("The relevant Murfey workflow was not found") # Get instrument name from the database to load the correct config file - session_row: MurfeySession = db.exec( - select(MurfeySession).where(MurfeySession.id == session_id) + session_row: MurfeyDB.Session = db.exec( + select(MurfeyDB.Session).where(MurfeyDB.Session.id == session_id) ).one() instrument_name = session_row.instrument_name @@ -842,7 +128,7 @@ def parse_stringified_list(cls, value): return value -@router.post("/sessions/{session_id}/clem/processing/align_and_merge_stacks") +@router.post("/sessions/{session_id}/align_and_merge_stacks") def align_and_merge_stacks( session_id: int, align_and_merge_params: AlignAndMergeParams, @@ -857,8 +143,8 @@ def align_and_merge_stacks( raise RuntimeError("The relevant Murfey workflow was not found") # Get instrument name from the database to load the correct config file - session_row: MurfeySession = db.exec( - select(MurfeySession).where(MurfeySession.id == session_id) + session_row: MurfeyDB.Session = db.exec( + select(MurfeyDB.Session).where(MurfeyDB.Session.id == session_id) ).one() instrument_name = session_row.instrument_name diff --git a/src/murfey/util/route_manifest.yaml b/src/murfey/util/route_manifest.yaml index 48d7aa506..db1ea9fd1 100644 --- a/src/murfey/util/route_manifest.yaml +++ b/src/murfey/util/route_manifest.yaml @@ -383,56 +383,21 @@ murfey.server.api.bootstrap.version: methods: - GET murfey.server.api.clem.router: - - path: /workflow/clem/sessions/{session_id}/clem/lif_files - function: register_lif_file - path_params: - - name: session_id - type: int - methods: - - POST - - path: /workflow/clem/sessions/{session_id}/clem/tiff_files - function: register_tiff_file - path_params: - - name: session_id - type: int - methods: - - POST - - path: /workflow/clem/sessions/{session_id}/clem/metadata_files - function: register_clem_metadata - path_params: - - name: session_id - type: int - methods: - - POST - - path: /workflow/clem/sessions/{session_id}/clem/image_series - function: register_image_series - path_params: - - name: session_id - type: int - methods: - - POST - - path: /workflow/clem/sessions/{session_id}/clem/image_stacks - function: register_image_stack - path_params: - - name: session_id - type: int - methods: - - POST - - path: /workflow/clem/sessions/{session_id}/clem/preprocessing/process_raw_lifs + - path: /workflow/clem/sessions/{session_id}/process_raw_lifs function: process_raw_lifs path_params: - name: session_id type: int methods: - POST - - path: /workflow/clem/sessions/{session_id}/clem/preprocessing/process_raw_tiffs + - path: /workflow/clem/sessions/{session_id}/process_raw_tiffs function: process_raw_tiffs path_params: - name: session_id type: int methods: - POST - - path: /workflow/clem/sessions/{session_id}/clem/processing/align_and_merge_stacks + - path: /workflow/clem/sessions/{session_id}/align_and_merge_stacks function: align_and_merge_stacks path_params: - name: session_id From 655929e7ca9f4454368ede4655ab31831347b5e9 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Wed, 18 Mar 2026 18:07:36 +0000 Subject: [PATCH 02/11] Removed client-side registration of CLEM LIF and TIFF files --- src/murfey/client/contexts/clem.py | 80 +++--------------------------- 1 file changed, 8 insertions(+), 72 deletions(-) diff --git a/src/murfey/client/contexts/clem.py b/src/murfey/client/contexts/clem.py index 0d789156a..ebb6df6d3 100644 --- a/src/murfey/client/contexts/clem.py +++ b/src/murfey/client/contexts/clem.py @@ -5,7 +5,7 @@ import logging from pathlib import Path -from typing import Dict, Generator, List, Optional +from typing import Generator from xml.etree import ElementTree as ET from defusedxml.ElementTree import parse @@ -23,7 +23,7 @@ def _file_transferred_to( source: Path, file_path: Path, rsync_basepath: Path, -) -> Optional[Path]: +): """ Returns the Path of the transferred file on the DLS file system. """ @@ -36,9 +36,7 @@ def _file_transferred_to( return destination -def _get_source( - file_path: Path, environment: MurfeyInstanceEnvironment -) -> Optional[Path]: +def _get_source(file_path: Path, environment: MurfeyInstanceEnvironment): """ Returns the Path of the file on the client PC. """ @@ -48,7 +46,7 @@ def _get_source( return None -def _get_image_elements(root: ET.Element) -> List[ET.Element]: +def _get_image_elements(root: ET.Element) -> list[ET.Element]: """ Searches the XML metadata recursively to find the nodes tagged as "Element" that have image-related tags. Some LIF datasets have layers of nested elements, so a @@ -93,14 +91,14 @@ def __init__( self._basepath = basepath self._machine_config = machine_config # CLEM contexts for "auto-save" acquisition mode - self._tiff_series: Dict[str, List[str]] = {} # {Series name : TIFF path list} - self._series_metadata: Dict[str, str] = {} # {Series name : Metadata file path} - self._files_in_series: Dict[str, int] = {} # {Series name : Total TIFFs} + self._tiff_series: dict[str, list[str]] = {} # {Series name : TIFF path list} + self._series_metadata: dict[str, str] = {} # {Series name : Metadata file path} + self._files_in_series: dict[str, int] = {} # {Series name : Total TIFFs} def post_transfer( self, transferred_file: Path, - environment: Optional[MurfeyInstanceEnvironment] = None, + environment: MurfeyInstanceEnvironment | None = None, **kwargs, ) -> bool: super().post_transfer(transferred_file, environment=environment, **kwargs) @@ -180,11 +178,6 @@ def post_transfer( f"File {transferred_file.name!r} added to series {series_name!r}" ) - # Register the TIFF file in the database - post_result = self.register_tiff_file(destination_file, environment) - if post_result is False: - return False - # Process XLIF files if transferred_file.suffix == ".xlif": # Skip processing of "_histo" histogram XLIF files @@ -311,12 +304,6 @@ def post_transfer( ) return False - # Post URL to register LIF file in database - post_result = self.register_lif_file(destination_file, environment) - if post_result is False: - return False - logger.info(f"Registered {destination_file.name!r} in the database") - # Post URL to trigger job and convert LIF file into image stacks post_result = self.process_lif_file(destination_file, environment) if post_result is False: @@ -326,31 +313,6 @@ def post_transfer( # Function has completed as expected return True - def register_lif_file( - self, - lif_file: Path, - environment: MurfeyInstanceEnvironment, - ): - """ - Constructs the URL and dictionary to be posted to the server, which will then - register the LIF file in the database correctly as part of the CLEM workflow. - """ - try: - capture_post( - base_url=str(environment.url.geturl()), - router_name="clem.router", - function_name="register_lif_file", - token=self._token, - session_id=environment.murfey_session, - data={"lif_file": str(lif_file)}, - ) - return True - except Exception as e: - logger.error( - f"Error encountered when registering the LIF file in the database: {e}" - ) - return False - def process_lif_file( self, lif_file: Path, @@ -375,32 +337,6 @@ def process_lif_file( logger.error(f"Error encountered processing LIF file: {e}") return False - def register_tiff_file( - self, - tiff_file: Path, - environment: MurfeyInstanceEnvironment, - ): - """ - Constructs the URL and dictionary to be posted to the server, which will then - register the TIFF file in the database correctly as part of the CLEM workflow. - """ - - try: - capture_post( - base_url=str(environment.url.geturl()), - router_name="clem.router", - function_name="register_tiff_file", - token=self._token, - session_id=environment.murfey_session, - data={"tiff_file": str(tiff_file)}, - ) - return True - except Exception as e: - logger.error( - f"Error encountered when registering the TIFF file in the database: {e}" - ) - return False - def process_tiff_series( self, tiff_dataset: dict, From 0f5f4319041205b63c1ecab8a93b73911e764bfb Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Wed, 18 Mar 2026 18:08:47 +0000 Subject: [PATCH 03/11] Removed registration of unneeded tables in CLEM workflow --- .../clem/register_preprocessing_results.py | 134 +++++------------- 1 file changed, 32 insertions(+), 102 deletions(-) diff --git a/src/murfey/workflows/clem/register_preprocessing_results.py b/src/murfey/workflows/clem/register_preprocessing_results.py index aa8d8dda0..dbdb53c1f 100644 --- a/src/murfey/workflows/clem/register_preprocessing_results.py +++ b/src/murfey/workflows/clem/register_preprocessing_results.py @@ -9,7 +9,6 @@ import json import logging -import re import traceback from collections.abc import Collection from importlib.metadata import entry_points @@ -25,7 +24,6 @@ from murfey.util.processing_params import ( default_clem_processing_parameters as processing_params, ) -from murfey.workflows.clem import get_db_entry from murfey.workflows.clem.align_and_merge import submit_cluster_request logger = logging.getLogger("murfey.workflows.clem.register_preprocessing_results") @@ -93,91 +91,19 @@ def _register_clem_image_series( result: CLEMPreprocessingResult, murfey_db: Session, ): - clem_img_series: MurfeyDB.CLEMImageSeries = get_db_entry( - db=murfey_db, - table=MurfeyDB.CLEMImageSeries, - session_id=session_id, - series_name=result.series_name, - ) - clem_metadata: MurfeyDB.CLEMImageMetadata = get_db_entry( - db=murfey_db, - table=MurfeyDB.CLEMImageMetadata, - session_id=session_id, - file_path=result.metadata, - ) - # Register and link parent LIF file if present - if result.parent_lif is not None: - clem_lif_file: MurfeyDB.CLEMLIFFile = get_db_entry( - db=murfey_db, - table=MurfeyDB.CLEMLIFFile, - session_id=session_id, - file_path=result.parent_lif, - ) - clem_img_series.parent_lif = clem_lif_file - clem_metadata.parent_lif = clem_lif_file - - # Link and commit series and metadata tables - clem_img_series.associated_metadata = clem_metadata - murfey_db.add_all([clem_img_series, clem_metadata]) - murfey_db.commit() - - # Iteratively register the output image stacks - for c, (channel, output_file) in enumerate(result.output_files.items()): - clem_img_stk: MurfeyDB.CLEMImageStack = get_db_entry( - db=murfey_db, - table=MurfeyDB.CLEMImageStack, - session_id=session_id, - file_path=output_file, + if not ( + clem_img_series := murfey_db.exec( + select(MurfeyDB.CLEMImageSeries) + .where(MurfeyDB.CLEMImageSeries.session_id == session_id) + .where(MurfeyDB.CLEMImageSeries.series_name == result.series_name) + ).one_or_none() + ): + clem_img_series = MurfeyDB.CLEMImageSeries( + session_id=session_id, series_name=result.series_name ) - # Link associated metadata - clem_img_stk.associated_metadata = clem_metadata - clem_img_stk.parent_series = clem_img_series - clem_img_stk.channel_name = channel - if result.parent_lif is not None: - clem_img_stk.parent_lif = clem_lif_file - murfey_db.add(clem_img_stk) - murfey_db.commit() - - # Register and link parent TIFF files if present - if result.parent_tiffs: - seed_file = result.parent_tiffs[channel][0] - if c == 0: - # Load list of files to register from seed file - series_identifier = seed_file.stem.split("--")[0] + "--" - tiff_list = list(seed_file.parent.glob(f"{series_identifier}--")) - - # Load TIFF files by colour channel if "--C" in file stem - match = re.search(r"--C[\d]{2,3}", seed_file.stem) - tiff_file_subset = [ - file - for file in tiff_list - if file.stem.startswith(series_identifier) - and (match.group(0) in file.stem if match else True) - ] - tiff_file_subset.sort() - - # Register TIFF file subset - clem_tiff_files = [] - for file in tiff_file_subset: - clem_tiff_file: MurfeyDB.CLEMTIFFFile = get_db_entry( - db=murfey_db, - table=MurfeyDB.CLEMTIFFFile, - session_id=session_id, - file_path=file, - ) - - # Link associated metadata - clem_tiff_file.associated_metadata = clem_metadata - clem_tiff_file.child_series = clem_img_series - clem_tiff_file.child_stack = clem_img_stk - - clem_tiff_files.append(clem_tiff_file) - - murfey_db.add_all(clem_tiff_files) - murfey_db.commit() - # Add metadata for this series + output_file = list(result.output_files.values())[0] clem_img_series.image_search_string = str(output_file.parent / "*tiff") clem_img_series.data_type = "atlas" if _is_clem_atlas(result) else "grid_square" clem_img_series.number_of_members = result.number_of_members @@ -353,12 +279,17 @@ def _register_dcg_and_atlas( .where(MurfeyDB.DataCollectionGroup.tag == dcg_name) ).one() - clem_img_series: MurfeyDB.CLEMImageSeries = get_db_entry( - db=murfey_db, - table=MurfeyDB.CLEMImageSeries, - session_id=session_id, - series_name=result.series_name, - ) + if not ( + clem_img_series := murfey_db.exec( + select(MurfeyDB.CLEMImageSeries) + .where(MurfeyDB.CLEMImageSeries.session_id == session_id) + .where(MurfeyDB.CLEMImageSeries.series_name == result.series_name) + ).one_or_none() + ): + clem_img_series = MurfeyDB.CLEMImageSeries( + session_id=session_id, series_name=result.series_name + ) + clem_img_series.dcg_id = dcg_entry.id clem_img_series.dcg_name = dcg_entry.tag murfey_db.add(clem_img_series) @@ -381,14 +312,14 @@ def _register_grid_square( dcg_name += f"--{result.series_name.split('--')[1]}" # Check if an atlas has been registered - if atlas_search := murfey_db.exec( - select(MurfeyDB.CLEMImageSeries) - .where(MurfeyDB.CLEMImageSeries.session_id == session_id) - .where(MurfeyDB.CLEMImageSeries.dcg_name == dcg_name) - .where(MurfeyDB.CLEMImageSeries.data_type == "atlas") - ).all(): - atlas_entry = atlas_search[0] - else: + if not ( + atlas_entry := murfey_db.exec( + select(MurfeyDB.CLEMImageSeries) + .where(MurfeyDB.CLEMImageSeries.session_id == session_id) + .where(MurfeyDB.CLEMImageSeries.dcg_name == dcg_name) + .where(MurfeyDB.CLEMImageSeries.data_type == "atlas") + ).one_or_none() + ): logger.info( f"No atlas has been registered for data collection group {dcg_name!r} yet" ) @@ -400,7 +331,7 @@ def _register_grid_square( .where(MurfeyDB.CLEMImageSeries.session_id == session_id) .where(MurfeyDB.CLEMImageSeries.dcg_name == dcg_name) .where(MurfeyDB.CLEMImageSeries.data_type == "grid_square") - ): + ).all(): if ( atlas_entry.x0 is not None and atlas_entry.x1 is not None @@ -481,14 +412,13 @@ def _register_grid_square( for murfey_color_flags, ispyb_color_flags in COLOR_FLAGS_MURFEY_TO_ISPYB.items() } # Register or update the grid square entry as required - if grid_square_result := murfey_db.exec( + if grid_square_entry := murfey_db.exec( select(MurfeyDB.GridSquare) .where(MurfeyDB.GridSquare.name == clem_img_series.id) .where(MurfeyDB.GridSquare.tag == grid_square_params.tag) .where(MurfeyDB.GridSquare.session_id == session_id) - ).all(): + ).one_or_none(): # Update existing grid square entry on Murfey - grid_square_entry = grid_square_result[0] grid_square_entry.x_location = grid_square_params.x_location grid_square_entry.y_location = grid_square_params.y_location grid_square_entry.x_stage_position = grid_square_params.x_stage_position From 654b47f2538ec49d18671084ec75ee5f01378061 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Wed, 18 Mar 2026 18:09:46 +0000 Subject: [PATCH 04/11] No need to use 'get_db_entry' --- .../clem/register_align_and_merge_results.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/murfey/workflows/clem/register_align_and_merge_results.py b/src/murfey/workflows/clem/register_align_and_merge_results.py index 1cdf4dbb5..fdd8cec98 100644 --- a/src/murfey/workflows/clem/register_align_and_merge_results.py +++ b/src/murfey/workflows/clem/register_align_and_merge_results.py @@ -8,10 +8,9 @@ from typing import Optional from pydantic import BaseModel, field_validator -from sqlmodel import Session +from sqlmodel import Session, select from murfey.util.db import CLEMImageSeries -from murfey.workflows.clem import get_db_entry logger = logging.getLogger("murfey.workflows.clem.register_align_and_merge_results") @@ -83,12 +82,16 @@ def register_align_and_merge_result( try: # Register items in database if not already present try: - clem_img_series: CLEMImageSeries = get_db_entry( - db=murfey_db, - table=CLEMImageSeries, - session_id=session_id, - series_name=result.series_name, - ) + if not ( + clem_img_series := murfey_db.exec( + select(CLEMImageSeries) + .where(CLEMImageSeries.session_id == session_id) + .where(CLEMImageSeries.series_name == result.series_name) + ).one_or_none() + ): + clem_img_series = CLEMImageSeries( + session_id=session_id, series_name=result.series_name + ) clem_img_series.composite_created = True murfey_db.add(clem_img_series) murfey_db.commit() From b161a26e241838d0f86023b91daed75ecb93dc2d Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Wed, 18 Mar 2026 18:40:38 +0000 Subject: [PATCH 05/11] Removed unneeded CLEM tables, and refactored 'CLEMImageSeries' into 'ImageSites' --- src/murfey/util/db.py | 266 ++---------------- src/murfey/workflows/clem/__init__.py | 24 +- .../clem/register_align_and_merge_results.py | 10 +- .../clem/register_preprocessing_results.py | 142 +++++----- .../test_register_preprocessing_results.py | 8 +- 5 files changed, 107 insertions(+), 343 deletions(-) diff --git a/src/murfey/util/db.py b/src/murfey/util/db.py index 78207ac71..695a0eb0e 100644 --- a/src/murfey/util/db.py +++ b/src/murfey/util/db.py @@ -52,29 +52,8 @@ class Session(SQLModel, table=True): # type: ignore process: bool = Field(default=True) visit_end_time: Optional[datetime] = Field(default=None) - # CLEM Workflow - - # LIF files collected, if any - lif_files: List["CLEMLIFFile"] = Relationship( - back_populates="session", - sa_relationship_kwargs={"cascade": "delete"}, - ) - # TIFF files collected, if any - tiff_files: List["CLEMTIFFFile"] = Relationship( - back_populates="session", - sa_relationship_kwargs={"cascade": "delete"}, - ) - # Metadata files generated - metadata_files: List["CLEMImageMetadata"] = Relationship( - back_populates="session", sa_relationship_kwargs={"cascade": "delete"} - ) - # Image series associated with this session - image_series: List["CLEMImageSeries"] = Relationship( - back_populates="session", - sa_relationship_kwargs={"cascade": "delete"}, - ) - # Image stacks associated with this session - image_stacks: List["CLEMImageStack"] = Relationship( + # Image sites associated with this session + image_sites: List["ImageSite"] = Relationship( back_populates="session", sa_relationship_kwargs={"cascade": "delete"}, ) @@ -107,141 +86,20 @@ class Session(SQLModel, table=True): # type: ignore ) -""" -CLEM WORKFLOW -""" - - -class CLEMLIFFile(SQLModel, table=True): # type: ignore - """ - Database recording the different LIF files acquired during the CLEM session, as - well as the different image series stored within them. - """ - - id: Optional[int] = Field(default=None, primary_key=True) - file_path: str = Field(index=True) # Path to LIF file - - # The CLEM session this series belongs to - session: Optional["Session"] = Relationship( - back_populates="lif_files" - ) # Many to one - session_id: Optional[int] = Field( - foreign_key="session.id", - default=None, - ) - - master_metadata: Optional[str] = Field( - index=True - ) # Path to master metadata generated from LIF file - - # Offspring - child_metadata: List["CLEMImageMetadata"] = Relationship( - back_populates="parent_lif", - sa_relationship_kwargs={"cascade": "delete"}, - ) # One to many - child_series: List["CLEMImageSeries"] = Relationship( - back_populates="parent_lif", - sa_relationship_kwargs={"cascade": "delete"}, - ) # One to many - child_stacks: List["CLEMImageStack"] = Relationship( - back_populates="parent_lif", - sa_relationship_kwargs={"cascade": "delete"}, - ) # One to many - - -class CLEMTIFFFile(SQLModel, table=True): # type: ignore +class ImageSite(SQLModel, table=True): # type: ignore """ - Database to record each raw TIFF file acquired during a CLEM session, which are - used to create an image stack + Table for recording unique imaging sites in the session. These can then be linked + to DataCollectionGroup or GridSquare entries as needed. """ id: Optional[int] = Field(default=None, primary_key=True) - file_path: str = Field(index=True) # File path to TIFF file on system - - session: Optional["Session"] = Relationship( - back_populates="tiff_files" - ) # Many to one - session_id: Optional[int] = Field( - foreign_key="session.id", - default=None, - ) - - # Metadata associated with this TIFF file - associated_metadata: Optional["CLEMImageMetadata"] = Relationship( - back_populates="associated_tiffs", - ) # Many to one - metadata_id: Optional[int] = Field( - foreign_key="clemimagemetadata.id", - default=None, - ) - - # Image series it contributes to - child_series: Optional["CLEMImageSeries"] = Relationship( - back_populates="parent_tiffs" - ) # Many to one - series_id: Optional[int] = Field( - foreign_key="clemimageseries.id", - default=None, - ) - - # Image stack it contributes to - child_stack: Optional["CLEMImageStack"] = Relationship( - back_populates="parent_tiffs" - ) # Many to one - stack_id: Optional[int] = Field( - foreign_key="clemimagestack.id", - default=None, - ) - - -class CLEMImageMetadata(SQLModel, table=True): # type: ignore - id: Optional[int] = Field(default=None, primary_key=True) - file_path: str = Field(index=True) # Full path to metadata file - - session: Optional["Session"] = Relationship(back_populates="metadata_files") - session_id: Optional[int] = Field(foreign_key="session.id") # Many to one - - # The parent LIF file this metadata originates from, if any - parent_lif: Optional[CLEMLIFFile] = Relationship( - back_populates="child_metadata", - ) # Many to one - parent_lif_id: Optional[int] = Field( - foreign_key="clemliffile.id", - default=None, - ) - # The TIFF files related to this file - associated_tiffs: List["CLEMTIFFFile"] = Relationship( - back_populates="associated_metadata", - sa_relationship_kwargs={"cascade": "delete"}, - ) # One to many - - # Associated series - associated_series: Optional["CLEMImageSeries"] = Relationship( - back_populates="associated_metadata", - sa_relationship_kwargs={"cascade": "delete"}, - ) # One to one - - # Associated image stacks - associated_stacks: List["CLEMImageStack"] = Relationship( - back_populates="associated_metadata", - sa_relationship_kwargs={"cascade": "delete"}, - ) # One to many + site_name: str = Field(index=True) + # File paths to images and thumbnails; can be a glob search string + image_path: Optional[str] = Field(default=None) + thumbnail_path: Optional[str] = Field(default=None) -class CLEMImageSeries(SQLModel, table=True): # type: ignore - """ - Database recording the files and metadata associated with a series, which are to be - processed together as a group. These files could stem from a parent LIF file, or - have been compiled together from individual TIFF files. - """ - - id: Optional[int] = Field(default=None, primary_key=True) - series_name: str = Field( - index=True - ) # Name of the series, as determined from the metadata - image_search_string: Optional[str] = Field(default=None) - thumbnail_search_string: Optional[str] = Field(default=None) - + # Link to Session table session: Optional["Session"] = Relationship( back_populates="image_series" ) # Many to one @@ -254,7 +112,7 @@ class CLEMImageSeries(SQLModel, table=True): # type: ignore # Link to data collection group data_collection_group: Optional["DataCollectionGroup"] = Relationship( - back_populates="clem_image_series" + back_populates="image_sites" ) dcg_id: Optional[int] = Field( foreign_key="datacollectiongroup.dataCollectionGroupId", default=None @@ -262,49 +120,9 @@ class CLEMImageSeries(SQLModel, table=True): # type: ignore dcg_name: Optional[str] = Field(default=None) # Link to grid squares - grid_square: Optional["GridSquare"] = Relationship( - back_populates="clem_image_series" - ) + grid_square: Optional["GridSquare"] = Relationship(back_populates="image_sites") grid_square_id: Optional[int] = Field(foreign_key="gridsquare.id", default=None) - # The parent LIF file this series originates from, if any - parent_lif: Optional["CLEMLIFFile"] = Relationship( - back_populates="child_series", - ) # Many to one - parent_lif_id: Optional[int] = Field( - foreign_key="clemliffile.id", - default=None, - ) - - # The parent TIFF files used to build up the image stacks in the series, if any - parent_tiffs: List["CLEMTIFFFile"] = Relationship( - back_populates="child_series", sa_relationship_kwargs={"cascade": "delete"} - ) # One to many - - # Metadata file for this series - associated_metadata: Optional["CLEMImageMetadata"] = Relationship( - back_populates="associated_series", - ) # One to one - metadata_id: Optional[int] = Field( - foreign_key="clemimagemetadata.id", - default=None, - ) - - # Image stack entries that are part of this series - child_stacks: List["CLEMImageStack"] = Relationship( - back_populates="parent_series", - sa_relationship_kwargs={"cascade": "delete"}, - ) # One to many - number_of_members: Optional[int] = Field(default=None) - has_grey: Optional[bool] = Field(default=None) - has_red: Optional[bool] = Field(default=None) - has_green: Optional[bool] = Field(default=None) - has_blue: Optional[bool] = Field(default=None) - has_cyan: Optional[bool] = Field(default=None) - has_magenta: Optional[bool] = Field(default=None) - has_yellow: Optional[bool] = Field(default=None) - collection_mode: Optional[str] = Field(default=None) - # Shape and resolution information image_pixels_x: Optional[int] = Field(default=None) image_pixels_y: Optional[int] = Field(default=None) @@ -320,55 +138,19 @@ class CLEMImageSeries(SQLModel, table=True): # type: ignore y0: Optional[float] = Field(default=None) y1: Optional[float] = Field(default=None) - # Composite images + # Colour channel-related fields + number_of_members: Optional[int] = Field(default=None) + has_grey: Optional[bool] = Field(default=None) + has_red: Optional[bool] = Field(default=None) + has_green: Optional[bool] = Field(default=None) + has_blue: Optional[bool] = Field(default=None) + has_cyan: Optional[bool] = Field(default=None) + has_magenta: Optional[bool] = Field(default=None) + has_yellow: Optional[bool] = Field(default=None) + collection_mode: Optional[str] = Field(default=None) composite_created: bool = False # Has a composite image been created? -class CLEMImageStack(SQLModel, table=True): # type: ignore - """ - Database to keep track of the processing status of a single image stack. - """ - - id: Optional[int] = Field(default=None, primary_key=True) - file_path: str = Field(index=True) # Full path to the file - channel_name: Optional[str] = None # Color associated with stack - - session: Optional["Session"] = Relationship( - back_populates="image_stacks" - ) # Many to one - session_id: Optional[int] = Field(foreign_key="session.id") - - # LIF file this stack originated from - parent_lif: Optional["CLEMLIFFile"] = Relationship( - back_populates="child_stacks", - ) # Many to one - parent_lif_id: Optional[int] = Field(foreign_key="clemliffile.id", default=None) - - # TIFF files used to build this stack - parent_tiffs: List["CLEMTIFFFile"] = Relationship( - back_populates="child_stack", - sa_relationship_kwargs={"cascade": "delete"}, - ) # One to many - - # Metadata associated with statck - associated_metadata: Optional["CLEMImageMetadata"] = Relationship( - back_populates="associated_stacks", - ) # Many to one - metadata_id: Optional[int] = Field( - foreign_key="clemimagemetadata.id", - default=None, - ) - - # Image series this image stack belongs to - parent_series: Optional["CLEMImageSeries"] = Relationship( - back_populates="child_stacks", - ) # Many to one - series_id: Optional[int] = Field( - foreign_key="clemimageseries.id", - default=None, - ) - - """ TEM SESSION AND PROCESSING WORKFLOW """ @@ -434,7 +216,7 @@ class DataCollectionGroup(SQLModel, table=True): # type: ignore back_populates="data_collection_group", sa_relationship_kwargs={"cascade": "delete"}, ) - clem_image_series: List["CLEMImageSeries"] = Relationship( + image_sites: List["ImageSite"] = Relationship( back_populates="data_collection_group", sa_relationship_kwargs={"cascade": "delete"}, ) @@ -702,7 +484,7 @@ class GridSquare(SQLModel, table=True): # type: ignore pixel_size: Optional[float] = None image: str = "" session: Optional[Session] = Relationship(back_populates="grid_squares") - clem_image_series: List["CLEMImageSeries"] = Relationship( + image_sites: List["ImageSite"] = Relationship( back_populates="grid_square", sa_relationship_kwargs={"cascade": "delete"} ) foil_holes: List["FoilHole"] = Relationship( diff --git a/src/murfey/workflows/clem/__init__.py b/src/murfey/workflows/clem/__init__.py index 4aedd01c0..ffb0d82d5 100644 --- a/src/murfey/workflows/clem/__init__.py +++ b/src/murfey/workflows/clem/__init__.py @@ -10,11 +10,7 @@ from murfey.util.config import get_machine_config from murfey.util.db import ( - CLEMImageMetadata, - CLEMImageSeries, - CLEMImageStack, - CLEMLIFFile, - CLEMTIFFFile, + ImageSite, Session as MurfeySession, ) @@ -92,25 +88,11 @@ def get_db_entry( # With the database search funcion having been moved out of the FastAPI # endpoint, the database now has to be explicitly passed within the FastAPI # endpoint function in order for it to be loaded in the correct state. - table: Type[ - Union[ - CLEMImageMetadata, - CLEMImageSeries, - CLEMImageStack, - CLEMLIFFile, - CLEMTIFFFile, - ] - ], + table: Type[Union[ImageSite,]], session_id: int, file_path: Optional[Path] = None, series_name: Optional[str] = None, -) -> Union[ - CLEMImageMetadata, - CLEMImageSeries, - CLEMImageStack, - CLEMLIFFile, - CLEMTIFFFile, -]: +) -> Union[ImageSite,]: """ Searches the CLEM workflow-related tables in the Murfey database for an entry that matches the file path or series name within a given session. Returns the entry if diff --git a/src/murfey/workflows/clem/register_align_and_merge_results.py b/src/murfey/workflows/clem/register_align_and_merge_results.py index fdd8cec98..17fb0a368 100644 --- a/src/murfey/workflows/clem/register_align_and_merge_results.py +++ b/src/murfey/workflows/clem/register_align_and_merge_results.py @@ -10,7 +10,7 @@ from pydantic import BaseModel, field_validator from sqlmodel import Session, select -from murfey.util.db import CLEMImageSeries +from murfey.util.db import ImageSite logger = logging.getLogger("murfey.workflows.clem.register_align_and_merge_results") @@ -84,12 +84,12 @@ def register_align_and_merge_result( try: if not ( clem_img_series := murfey_db.exec( - select(CLEMImageSeries) - .where(CLEMImageSeries.session_id == session_id) - .where(CLEMImageSeries.series_name == result.series_name) + select(ImageSite) + .where(ImageSite.session_id == session_id) + .where(ImageSite.series_name == result.series_name) ).one_or_none() ): - clem_img_series = CLEMImageSeries( + clem_img_series = ImageSite( session_id=session_id, series_name=result.series_name ) clem_img_series.composite_created = True diff --git a/src/murfey/workflows/clem/register_preprocessing_results.py b/src/murfey/workflows/clem/register_preprocessing_results.py index dbdb53c1f..b2450cec9 100644 --- a/src/murfey/workflows/clem/register_preprocessing_results.py +++ b/src/murfey/workflows/clem/register_preprocessing_results.py @@ -86,53 +86,53 @@ def _get_color_flags( return color_flags -def _register_clem_image_series( +def _register_clem_image_site( session_id: int, result: CLEMPreprocessingResult, murfey_db: Session, ): if not ( - clem_img_series := murfey_db.exec( - select(MurfeyDB.CLEMImageSeries) - .where(MurfeyDB.CLEMImageSeries.session_id == session_id) - .where(MurfeyDB.CLEMImageSeries.series_name == result.series_name) + clem_img_site := murfey_db.exec( + select(MurfeyDB.ImageSite) + .where(MurfeyDB.ImageSite.session_id == session_id) + .where(MurfeyDB.ImageSite.site_name == result.series_name) ).one_or_none() ): - clem_img_series = MurfeyDB.CLEMImageSeries( + clem_img_site = MurfeyDB.ImageSite( session_id=session_id, series_name=result.series_name ) # Add metadata for this series output_file = list(result.output_files.values())[0] - clem_img_series.image_search_string = str(output_file.parent / "*tiff") - clem_img_series.data_type = "atlas" if _is_clem_atlas(result) else "grid_square" - clem_img_series.number_of_members = result.number_of_members + clem_img_site.image_path = str(output_file.parent / "*tiff") + clem_img_site.data_type = "atlas" if _is_clem_atlas(result) else "grid_square" + clem_img_site.number_of_members = result.number_of_members for col_name, value in _get_color_flags(result.output_files.keys()).items(): - setattr(clem_img_series, col_name, value) - clem_img_series.collection_mode = _determine_collection_mode( + setattr(clem_img_site, col_name, value) + clem_img_site.collection_mode = _determine_collection_mode( result.output_files.keys() ) - clem_img_series.image_pixels_x = result.pixels_x - clem_img_series.image_pixels_y = result.pixels_y - clem_img_series.image_pixel_size = result.pixel_size - clem_img_series.units = result.units - clem_img_series.x0 = result.extent[0] - clem_img_series.x1 = result.extent[1] - clem_img_series.y0 = result.extent[2] - clem_img_series.y1 = result.extent[3] + clem_img_site.image_pixels_x = result.pixels_x + clem_img_site.image_pixels_y = result.pixels_y + clem_img_site.image_pixel_size = result.pixel_size + clem_img_site.units = result.units + clem_img_site.x0 = result.extent[0] + clem_img_site.x1 = result.extent[1] + clem_img_site.y0 = result.extent[2] + clem_img_site.y1 = result.extent[3] # Register thumbnails if they are present if result.thumbnails and result.thumbnail_size: thumbnail = list(result.thumbnails.values())[0] - clem_img_series.thumbnail_search_string = str(thumbnail.parent / "*.png") + clem_img_site.thumbnail_path = str(thumbnail.parent / "*.png") thumbnail_height, thumbnail_width = result.thumbnail_size scaling_factor = min( thumbnail_height / result.pixels_y, thumbnail_width / result.pixels_x ) - clem_img_series.thumbnail_pixel_size = result.pixel_size / scaling_factor - clem_img_series.thumbnail_pixels_x = int(result.pixels_x * scaling_factor) - clem_img_series.thumbnail_pixels_y = int(result.pixels_y * scaling_factor) - murfey_db.add(clem_img_series) + clem_img_site.thumbnail_pixel_size = result.pixel_size / scaling_factor + clem_img_site.thumbnail_pixels_x = int(result.pixels_x * scaling_factor) + clem_img_site.thumbnail_pixels_y = int(result.pixels_y * scaling_factor) + murfey_db.add(clem_img_site) murfey_db.commit() murfey_db.close() @@ -280,19 +280,19 @@ def _register_dcg_and_atlas( ).one() if not ( - clem_img_series := murfey_db.exec( - select(MurfeyDB.CLEMImageSeries) - .where(MurfeyDB.CLEMImageSeries.session_id == session_id) - .where(MurfeyDB.CLEMImageSeries.series_name == result.series_name) + clem_img_site := murfey_db.exec( + select(MurfeyDB.ImageSite) + .where(MurfeyDB.ImageSite.session_id == session_id) + .where(MurfeyDB.ImageSite.site_name == result.series_name) ).one_or_none() ): - clem_img_series = MurfeyDB.CLEMImageSeries( + clem_img_site = MurfeyDB.ImageSite( session_id=session_id, series_name=result.series_name ) - clem_img_series.dcg_id = dcg_entry.id - clem_img_series.dcg_name = dcg_entry.tag - murfey_db.add(clem_img_series) + clem_img_site.dcg_id = dcg_entry.id + clem_img_site.dcg_name = dcg_entry.tag + murfey_db.add(clem_img_site) murfey_db.commit() murfey_db.close() @@ -314,10 +314,10 @@ def _register_grid_square( # Check if an atlas has been registered if not ( atlas_entry := murfey_db.exec( - select(MurfeyDB.CLEMImageSeries) - .where(MurfeyDB.CLEMImageSeries.session_id == session_id) - .where(MurfeyDB.CLEMImageSeries.dcg_name == dcg_name) - .where(MurfeyDB.CLEMImageSeries.data_type == "atlas") + select(MurfeyDB.ImageSite) + .where(MurfeyDB.ImageSite.session_id == session_id) + .where(MurfeyDB.ImageSite.dcg_name == dcg_name) + .where(MurfeyDB.ImageSite.data_type == "atlas") ).one_or_none() ): logger.info( @@ -326,11 +326,11 @@ def _register_grid_square( return # Check if there are CLEM entries to register - if clem_img_series_to_register := murfey_db.exec( - select(MurfeyDB.CLEMImageSeries) - .where(MurfeyDB.CLEMImageSeries.session_id == session_id) - .where(MurfeyDB.CLEMImageSeries.dcg_name == dcg_name) - .where(MurfeyDB.CLEMImageSeries.data_type == "grid_square") + if clem_img_site_to_register := murfey_db.exec( + select(MurfeyDB.ImageSite) + .where(MurfeyDB.ImageSite.session_id == session_id) + .where(MurfeyDB.ImageSite.dcg_name == dcg_name) + .where(MurfeyDB.ImageSite.data_type == "grid_square") ).all(): if ( atlas_entry.x0 is not None @@ -346,23 +346,23 @@ def _register_grid_square( logger.warning("Atlas entry not populated with required values") return - for clem_img_series in clem_img_series_to_register: + for clem_img_site in clem_img_site_to_register: # Register datasets using thumbnail sizes and scales if ( - clem_img_series.x0 is not None - and clem_img_series.x1 is not None - and clem_img_series.y0 is not None - and clem_img_series.y1 is not None + clem_img_site.x0 is not None + and clem_img_site.x1 is not None + and clem_img_site.y0 is not None + and clem_img_site.y1 is not None ): # Find pixel corresponding to image midpoint on atlas x_mid_real = ( - 0.5 * (clem_img_series.x0 + clem_img_series.x1) - atlas_entry.x0 + 0.5 * (clem_img_site.x0 + clem_img_site.x1) - atlas_entry.x0 ) x_mid_px = int( x_mid_real / atlas_width_real * atlas_entry.thumbnail_pixels_x ) y_mid_real = ( - 0.5 * (clem_img_series.y0 + clem_img_series.y1) - atlas_entry.y0 + 0.5 * (clem_img_site.y0 + clem_img_site.y1) - atlas_entry.y0 ) y_mid_px = int( y_mid_real / atlas_height_real * atlas_entry.thumbnail_pixels_y @@ -370,51 +370,51 @@ def _register_grid_square( # Find the size of the image, in pixels, when overlaid on the atlas width_scaled = int( - (clem_img_series.x1 - clem_img_series.x0) + (clem_img_site.x1 - clem_img_site.x0) / atlas_width_real * atlas_entry.thumbnail_pixels_x ) height_scaled = int( - (clem_img_series.y1 - clem_img_series.y0) + (clem_img_site.y1 - clem_img_site.y0) / atlas_height_real * atlas_entry.thumbnail_pixels_y ) else: logger.warning( - f"Image series {clem_img_series.series_name!r} not populated with required values" + f"Image series {clem_img_site.site_name!r} not populated with required values" ) continue # Populate grid square Pydantic model grid_square_params = GridSquareParameters( tag=dcg_name, - x_location=clem_img_series.x0, + x_location=clem_img_site.x0, x_location_scaled=x_mid_px, - y_location=clem_img_series.y0, + y_location=clem_img_site.y0, y_location_scaled=y_mid_px, - readout_area_x=clem_img_series.image_pixels_x, - readout_area_y=clem_img_series.image_pixels_y, - thumbnail_size_x=clem_img_series.thumbnail_pixels_x, - thumbnail_size_y=clem_img_series.thumbnail_pixels_y, - width=clem_img_series.image_pixels_x, + readout_area_x=clem_img_site.image_pixels_x, + readout_area_y=clem_img_site.image_pixels_y, + thumbnail_size_x=clem_img_site.thumbnail_pixels_x, + thumbnail_size_y=clem_img_site.thumbnail_pixels_y, + width=clem_img_site.image_pixels_x, width_scaled=width_scaled, - height=clem_img_series.image_pixels_y, + height=clem_img_site.image_pixels_y, height_scaled=height_scaled, - x_stage_position=0.5 * (clem_img_series.x0 + clem_img_series.x1), - y_stage_position=0.5 * (clem_img_series.y0 + clem_img_series.y1), - pixel_size=clem_img_series.image_pixel_size, - image=clem_img_series.thumbnail_search_string, - collection_mode=clem_img_series.collection_mode, + x_stage_position=0.5 * (clem_img_site.x0 + clem_img_site.x1), + y_stage_position=0.5 * (clem_img_site.y0 + clem_img_site.y1), + pixel_size=clem_img_site.image_pixel_size, + image=clem_img_site.thumbnail_path, + collection_mode=clem_img_site.collection_mode, ) # Construct colour flags for ISPyB color_flags = { - ispyb_color_flags: int(getattr(clem_img_series, murfey_color_flags, 0)) + ispyb_color_flags: int(getattr(clem_img_site, murfey_color_flags, 0)) for murfey_color_flags, ispyb_color_flags in COLOR_FLAGS_MURFEY_TO_ISPYB.items() } # Register or update the grid square entry as required if grid_square_entry := murfey_db.exec( select(MurfeyDB.GridSquare) - .where(MurfeyDB.GridSquare.name == clem_img_series.id) + .where(MurfeyDB.GridSquare.name == clem_img_site.id) .where(MurfeyDB.GridSquare.tag == grid_square_params.tag) .where(MurfeyDB.GridSquare.session_id == session_id) ).one_or_none(): @@ -446,14 +446,14 @@ def _register_grid_square( # Register to ISPyB grid_square_ispyb_result = _transport_object.do_insert_grid_square( atlas_id=dcg_entry.atlas_id, - grid_square_id=clem_img_series.id, + grid_square_id=clem_img_site.id, grid_square_parameters=grid_square_params, color_flags=color_flags, ) # Register to Murfey grid_square_entry = MurfeyDB.GridSquare( id=grid_square_ispyb_result.get("return_value", None), - name=clem_img_series.id, + name=clem_img_site.id, session_id=session_id, tag=grid_square_params.tag, x_location=grid_square_params.x_location, @@ -471,8 +471,8 @@ def _register_grid_square( murfey_db.commit() # Add grid square ID to existing CLEM image series entry - clem_img_series.grid_square_id = grid_square_entry.id - murfey_db.add(clem_img_series) + clem_img_site.grid_square_id = grid_square_entry.id + murfey_db.add(clem_img_site) murfey_db.commit() else: logger.info( @@ -521,7 +521,7 @@ def run(message: dict, murfey_db: Session) -> dict[str, bool]: return {"success": False, "requeue": False} try: # Register items in Murfey database - _register_clem_image_series( + _register_clem_image_site( session_id=session_id, result=result, murfey_db=murfey_db, diff --git a/tests/workflows/clem/test_register_preprocessing_results.py b/tests/workflows/clem/test_register_preprocessing_results.py index b701a6b6d..66f35814c 100644 --- a/tests/workflows/clem/test_register_preprocessing_results.py +++ b/tests/workflows/clem/test_register_preprocessing_results.py @@ -15,7 +15,7 @@ COLOR_FLAGS_MURFEY_TO_ISPYB, _determine_collection_mode, _get_color_flags, - _register_clem_image_series, + _register_clem_image_site, _register_dcg_and_atlas, _register_grid_square, _snake_to_camel_case, @@ -173,8 +173,8 @@ def test_get_color_flags(test_params: tuple[list[str], dict[str, bool]]): assert _get_color_flags(colors) == expected_result -def test_register_clem_image_series(): - _register_clem_image_series +def test_register_clem_image_site(): + _register_clem_image_site @pytest.mark.parametrize( @@ -243,7 +243,7 @@ def test_run( # Mock the registration helper functions mock_register_clem_series = mocker.patch( - "murfey.workflows.clem.register_preprocessing_results._register_clem_image_series" + "murfey.workflows.clem.register_preprocessing_results._register_clem_image_site" ) mock_register_dcg_and_atlas = mocker.patch( "murfey.workflows.clem.register_preprocessing_results._register_dcg_and_atlas" From dc0e96bbd569aefd6caa00f55abc31c0b8bfc4b7 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Thu, 19 Mar 2026 08:53:52 +0000 Subject: [PATCH 06/11] Use 'ImagingSite' instead of 'ImageSite' --- src/murfey/util/db.py | 14 ++++---- src/murfey/workflows/clem/__init__.py | 6 ++-- .../clem/register_align_and_merge_results.py | 10 +++--- .../clem/register_preprocessing_results.py | 36 +++++++++---------- .../test_register_preprocessing_results.py | 8 ++--- 5 files changed, 37 insertions(+), 37 deletions(-) diff --git a/src/murfey/util/db.py b/src/murfey/util/db.py index 695a0eb0e..8f19d0427 100644 --- a/src/murfey/util/db.py +++ b/src/murfey/util/db.py @@ -53,7 +53,7 @@ class Session(SQLModel, table=True): # type: ignore visit_end_time: Optional[datetime] = Field(default=None) # Image sites associated with this session - image_sites: List["ImageSite"] = Relationship( + imaging_sites: List["ImagingSite"] = Relationship( back_populates="session", sa_relationship_kwargs={"cascade": "delete"}, ) @@ -86,7 +86,7 @@ class Session(SQLModel, table=True): # type: ignore ) -class ImageSite(SQLModel, table=True): # type: ignore +class ImagingSite(SQLModel, table=True): # type: ignore """ Table for recording unique imaging sites in the session. These can then be linked to DataCollectionGroup or GridSquare entries as needed. @@ -101,7 +101,7 @@ class ImageSite(SQLModel, table=True): # type: ignore # Link to Session table session: Optional["Session"] = Relationship( - back_populates="image_series" + back_populates="imaging_sites" ) # Many to one session_id: Optional[int] = Field( foreign_key="session.id", default=None, unique=False @@ -112,7 +112,7 @@ class ImageSite(SQLModel, table=True): # type: ignore # Link to data collection group data_collection_group: Optional["DataCollectionGroup"] = Relationship( - back_populates="image_sites" + back_populates="imaging_sites" ) dcg_id: Optional[int] = Field( foreign_key="datacollectiongroup.dataCollectionGroupId", default=None @@ -120,7 +120,7 @@ class ImageSite(SQLModel, table=True): # type: ignore dcg_name: Optional[str] = Field(default=None) # Link to grid squares - grid_square: Optional["GridSquare"] = Relationship(back_populates="image_sites") + grid_square: Optional["GridSquare"] = Relationship(back_populates="imaging_sites") grid_square_id: Optional[int] = Field(foreign_key="gridsquare.id", default=None) # Shape and resolution information @@ -216,7 +216,7 @@ class DataCollectionGroup(SQLModel, table=True): # type: ignore back_populates="data_collection_group", sa_relationship_kwargs={"cascade": "delete"}, ) - image_sites: List["ImageSite"] = Relationship( + imaging_sites: List["ImagingSite"] = Relationship( back_populates="data_collection_group", sa_relationship_kwargs={"cascade": "delete"}, ) @@ -484,7 +484,7 @@ class GridSquare(SQLModel, table=True): # type: ignore pixel_size: Optional[float] = None image: str = "" session: Optional[Session] = Relationship(back_populates="grid_squares") - image_sites: List["ImageSite"] = Relationship( + imaging_sites: List["ImagingSite"] = Relationship( back_populates="grid_square", sa_relationship_kwargs={"cascade": "delete"} ) foil_holes: List["FoilHole"] = Relationship( diff --git a/src/murfey/workflows/clem/__init__.py b/src/murfey/workflows/clem/__init__.py index ffb0d82d5..d06f6fd85 100644 --- a/src/murfey/workflows/clem/__init__.py +++ b/src/murfey/workflows/clem/__init__.py @@ -10,7 +10,7 @@ from murfey.util.config import get_machine_config from murfey.util.db import ( - ImageSite, + ImagingSite, Session as MurfeySession, ) @@ -88,11 +88,11 @@ def get_db_entry( # With the database search funcion having been moved out of the FastAPI # endpoint, the database now has to be explicitly passed within the FastAPI # endpoint function in order for it to be loaded in the correct state. - table: Type[Union[ImageSite,]], + table: Type[Union[ImagingSite,]], session_id: int, file_path: Optional[Path] = None, series_name: Optional[str] = None, -) -> Union[ImageSite,]: +) -> Union[ImagingSite,]: """ Searches the CLEM workflow-related tables in the Murfey database for an entry that matches the file path or series name within a given session. Returns the entry if diff --git a/src/murfey/workflows/clem/register_align_and_merge_results.py b/src/murfey/workflows/clem/register_align_and_merge_results.py index 17fb0a368..a29d0175f 100644 --- a/src/murfey/workflows/clem/register_align_and_merge_results.py +++ b/src/murfey/workflows/clem/register_align_and_merge_results.py @@ -10,7 +10,7 @@ from pydantic import BaseModel, field_validator from sqlmodel import Session, select -from murfey.util.db import ImageSite +from murfey.util.db import ImagingSite logger = logging.getLogger("murfey.workflows.clem.register_align_and_merge_results") @@ -84,12 +84,12 @@ def register_align_and_merge_result( try: if not ( clem_img_series := murfey_db.exec( - select(ImageSite) - .where(ImageSite.session_id == session_id) - .where(ImageSite.series_name == result.series_name) + select(ImagingSite) + .where(ImagingSite.session_id == session_id) + .where(ImagingSite.series_name == result.series_name) ).one_or_none() ): - clem_img_series = ImageSite( + clem_img_series = ImagingSite( session_id=session_id, series_name=result.series_name ) clem_img_series.composite_created = True diff --git a/src/murfey/workflows/clem/register_preprocessing_results.py b/src/murfey/workflows/clem/register_preprocessing_results.py index b2450cec9..6aa9163d9 100644 --- a/src/murfey/workflows/clem/register_preprocessing_results.py +++ b/src/murfey/workflows/clem/register_preprocessing_results.py @@ -86,19 +86,19 @@ def _get_color_flags( return color_flags -def _register_clem_image_site( +def _register_clem_imaging_site( session_id: int, result: CLEMPreprocessingResult, murfey_db: Session, ): if not ( clem_img_site := murfey_db.exec( - select(MurfeyDB.ImageSite) - .where(MurfeyDB.ImageSite.session_id == session_id) - .where(MurfeyDB.ImageSite.site_name == result.series_name) + select(MurfeyDB.ImagingSite) + .where(MurfeyDB.ImagingSite.session_id == session_id) + .where(MurfeyDB.ImagingSite.site_name == result.series_name) ).one_or_none() ): - clem_img_site = MurfeyDB.ImageSite( + clem_img_site = MurfeyDB.ImagingSite( session_id=session_id, series_name=result.series_name ) @@ -281,12 +281,12 @@ def _register_dcg_and_atlas( if not ( clem_img_site := murfey_db.exec( - select(MurfeyDB.ImageSite) - .where(MurfeyDB.ImageSite.session_id == session_id) - .where(MurfeyDB.ImageSite.site_name == result.series_name) + select(MurfeyDB.ImagingSite) + .where(MurfeyDB.ImagingSite.session_id == session_id) + .where(MurfeyDB.ImagingSite.site_name == result.series_name) ).one_or_none() ): - clem_img_site = MurfeyDB.ImageSite( + clem_img_site = MurfeyDB.ImagingSite( session_id=session_id, series_name=result.series_name ) @@ -314,10 +314,10 @@ def _register_grid_square( # Check if an atlas has been registered if not ( atlas_entry := murfey_db.exec( - select(MurfeyDB.ImageSite) - .where(MurfeyDB.ImageSite.session_id == session_id) - .where(MurfeyDB.ImageSite.dcg_name == dcg_name) - .where(MurfeyDB.ImageSite.data_type == "atlas") + select(MurfeyDB.ImagingSite) + .where(MurfeyDB.ImagingSite.session_id == session_id) + .where(MurfeyDB.ImagingSite.dcg_name == dcg_name) + .where(MurfeyDB.ImagingSite.data_type == "atlas") ).one_or_none() ): logger.info( @@ -327,10 +327,10 @@ def _register_grid_square( # Check if there are CLEM entries to register if clem_img_site_to_register := murfey_db.exec( - select(MurfeyDB.ImageSite) - .where(MurfeyDB.ImageSite.session_id == session_id) - .where(MurfeyDB.ImageSite.dcg_name == dcg_name) - .where(MurfeyDB.ImageSite.data_type == "grid_square") + select(MurfeyDB.ImagingSite) + .where(MurfeyDB.ImagingSite.session_id == session_id) + .where(MurfeyDB.ImagingSite.dcg_name == dcg_name) + .where(MurfeyDB.ImagingSite.data_type == "grid_square") ).all(): if ( atlas_entry.x0 is not None @@ -521,7 +521,7 @@ def run(message: dict, murfey_db: Session) -> dict[str, bool]: return {"success": False, "requeue": False} try: # Register items in Murfey database - _register_clem_image_site( + _register_clem_imaging_site( session_id=session_id, result=result, murfey_db=murfey_db, diff --git a/tests/workflows/clem/test_register_preprocessing_results.py b/tests/workflows/clem/test_register_preprocessing_results.py index 66f35814c..b1fc1c0d9 100644 --- a/tests/workflows/clem/test_register_preprocessing_results.py +++ b/tests/workflows/clem/test_register_preprocessing_results.py @@ -15,7 +15,7 @@ COLOR_FLAGS_MURFEY_TO_ISPYB, _determine_collection_mode, _get_color_flags, - _register_clem_image_site, + _register_clem_imaging_site, _register_dcg_and_atlas, _register_grid_square, _snake_to_camel_case, @@ -173,8 +173,8 @@ def test_get_color_flags(test_params: tuple[list[str], dict[str, bool]]): assert _get_color_flags(colors) == expected_result -def test_register_clem_image_site(): - _register_clem_image_site +def test_register_clem_imaging_site(): + _register_clem_imaging_site @pytest.mark.parametrize( @@ -243,7 +243,7 @@ def test_run( # Mock the registration helper functions mock_register_clem_series = mocker.patch( - "murfey.workflows.clem.register_preprocessing_results._register_clem_image_site" + "murfey.workflows.clem.register_preprocessing_results._register_clem_imaging_site" ) mock_register_dcg_and_atlas = mocker.patch( "murfey.workflows.clem.register_preprocessing_results._register_dcg_and_atlas" From b636aacf1c6a98b1e0210d9813c247d1fafbdba2 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Thu, 19 Mar 2026 09:03:29 +0000 Subject: [PATCH 07/11] Missed renaming fields from 'series_name' to 'site_name' --- src/murfey/workflows/clem/register_preprocessing_results.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/murfey/workflows/clem/register_preprocessing_results.py b/src/murfey/workflows/clem/register_preprocessing_results.py index 6aa9163d9..6c4d97fdc 100644 --- a/src/murfey/workflows/clem/register_preprocessing_results.py +++ b/src/murfey/workflows/clem/register_preprocessing_results.py @@ -99,7 +99,7 @@ def _register_clem_imaging_site( ).one_or_none() ): clem_img_site = MurfeyDB.ImagingSite( - session_id=session_id, series_name=result.series_name + session_id=session_id, site_name=result.series_name ) # Add metadata for this series @@ -287,7 +287,7 @@ def _register_dcg_and_atlas( ).one_or_none() ): clem_img_site = MurfeyDB.ImagingSite( - session_id=session_id, series_name=result.series_name + session_id=session_id, site_name=result.series_name ) clem_img_site.dcg_id = dcg_entry.id From 9fdca4879ba43bf47bf4b526f84d3b21189560ac Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Thu, 19 Mar 2026 09:42:55 +0000 Subject: [PATCH 08/11] Removed legacy function names for CLEM workflows, replacing them with 'run' instead --- pyproject.toml | 8 ++++---- src/murfey/workflows/clem/align_and_merge.py | 2 +- src/murfey/workflows/clem/process_raw_lifs.py | 2 +- src/murfey/workflows/clem/process_raw_tiffs.py | 2 +- .../workflows/clem/register_align_and_merge_results.py | 4 +--- .../workflows/clem/register_preprocessing_results.py | 4 ++-- tests/workflows/clem/test_align_and_merge.py | 6 +++--- tests/workflows/clem/test_process_raw_lifs.py | 6 +++--- tests/workflows/clem/test_process_raw_tiffs.py | 6 +++--- .../workflows/clem/test_register_preprocessing_results.py | 4 ++-- 10 files changed, 21 insertions(+), 23 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1f76fe081..03b8484e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -98,10 +98,10 @@ GitHub = "https://github.com/DiamondLightSource/python-murfey" "murfey_machine" = "murfey.util.config:get_extended_machine_config" [project.entry-points."murfey.workflows"] "atlas_update" = "murfey.workflows.register_atlas_update:run" -"clem.align_and_merge" = "murfey.workflows.clem.align_and_merge:submit_cluster_request" -"clem.process_raw_lifs" = "murfey.workflows.clem.process_raw_lifs:zocalo_cluster_request" -"clem.process_raw_tiffs" = "murfey.workflows.clem.process_raw_tiffs:zocalo_cluster_request" -"clem.register_align_and_merge_result" = "murfey.workflows.clem.register_align_and_merge_results:register_align_and_merge_result" +"clem.align_and_merge" = "murfey.workflows.clem.align_and_merge:run" +"clem.process_raw_lifs" = "murfey.workflows.clem.process_raw_lifs:run" +"clem.process_raw_tiffs" = "murfey.workflows.clem.process_raw_tiffs:run" +"clem.register_align_and_merge_result" = "murfey.workflows.clem.register_align_and_merge_results:run" "clem.register_preprocessing_result" = "murfey.workflows.clem.register_preprocessing_results:run" "data_collection" = "murfey.workflows.register_data_collection:run" "data_collection_group" = "murfey.workflows.register_data_collection_group:run" diff --git a/src/murfey/workflows/clem/align_and_merge.py b/src/murfey/workflows/clem/align_and_merge.py index 2bc241ae0..1b109bbc4 100644 --- a/src/murfey/workflows/clem/align_and_merge.py +++ b/src/murfey/workflows/clem/align_and_merge.py @@ -16,7 +16,7 @@ pass # Ignore if ISPyB credentials environment variable not set -def submit_cluster_request( +def run( # Session parameters session_id: int, instrument_name: str, diff --git a/src/murfey/workflows/clem/process_raw_lifs.py b/src/murfey/workflows/clem/process_raw_lifs.py index 9d2dd4937..320624cbb 100644 --- a/src/murfey/workflows/clem/process_raw_lifs.py +++ b/src/murfey/workflows/clem/process_raw_lifs.py @@ -15,7 +15,7 @@ logger = getLogger("murfey.workflows.clem.process_raw_lifs") -def zocalo_cluster_request( +def run( file: Path, root_folder: str, session_id: int, # Provided by the client via the API endpoint diff --git a/src/murfey/workflows/clem/process_raw_tiffs.py b/src/murfey/workflows/clem/process_raw_tiffs.py index e8f84a966..07ea8eb81 100644 --- a/src/murfey/workflows/clem/process_raw_tiffs.py +++ b/src/murfey/workflows/clem/process_raw_tiffs.py @@ -15,7 +15,7 @@ pass # Ignore if ISPyB credentials environment variable not set -def zocalo_cluster_request( +def run( tiff_list: list[Path], root_folder: str, session_id: int, diff --git a/src/murfey/workflows/clem/register_align_and_merge_results.py b/src/murfey/workflows/clem/register_align_and_merge_results.py index a29d0175f..bd39f5cb3 100644 --- a/src/murfey/workflows/clem/register_align_and_merge_results.py +++ b/src/murfey/workflows/clem/register_align_and_merge_results.py @@ -40,9 +40,7 @@ def parse_stringified_list(cls, value): return value -def register_align_and_merge_result( - message: dict, murfey_db: Session -) -> dict[str, bool]: +def run(message: dict, murfey_db: Session) -> dict[str, bool]: """ session_id (recipe) register (wrapper) diff --git a/src/murfey/workflows/clem/register_preprocessing_results.py b/src/murfey/workflows/clem/register_preprocessing_results.py index 6c4d97fdc..cee1c6008 100644 --- a/src/murfey/workflows/clem/register_preprocessing_results.py +++ b/src/murfey/workflows/clem/register_preprocessing_results.py @@ -24,7 +24,7 @@ from murfey.util.processing_params import ( default_clem_processing_parameters as processing_params, ) -from murfey.workflows.clem.align_and_merge import submit_cluster_request +from murfey.workflows.clem.align_and_merge import run as run_align_and_merge logger = logging.getLogger("murfey.workflows.clem.register_preprocessing_results") @@ -589,7 +589,7 @@ def run(message: dict, murfey_db: Session) -> dict[str, bool]: # Request for image alignment and processing for the requested combinations for image_combo in image_combos_to_process: try: - submit_cluster_request( + run_align_and_merge( session_id=session_id, instrument_name=murfey_session.instrument_name, series_name=result.series_name, diff --git a/tests/workflows/clem/test_align_and_merge.py b/tests/workflows/clem/test_align_and_merge.py index b7d5d8ec3..d5a7a740c 100644 --- a/tests/workflows/clem/test_align_and_merge.py +++ b/tests/workflows/clem/test_align_and_merge.py @@ -6,7 +6,7 @@ from murfey.server.ispyb import TransportManager from murfey.util.config import MachineConfig -from murfey.workflows.clem.align_and_merge import submit_cluster_request +from murfey.workflows.clem.align_and_merge import run # Folder and file settings session_id = 0 @@ -63,7 +63,7 @@ def metadata(processed_dir: Path): @patch("murfey.workflows.clem.align_and_merge.get_machine_config") -def test_submit_cluster_request( +def test_run( mock_get_machine_config, image_stacks: list[Path], metadata: Path, @@ -86,7 +86,7 @@ def test_submit_cluster_request( } # Run the function - submit_cluster_request( + run( session_id=session_id, instrument_name=instrument_name, series_name=series_name_long, diff --git a/tests/workflows/clem/test_process_raw_lifs.py b/tests/workflows/clem/test_process_raw_lifs.py index 32b573fc8..6ae414214 100644 --- a/tests/workflows/clem/test_process_raw_lifs.py +++ b/tests/workflows/clem/test_process_raw_lifs.py @@ -4,7 +4,7 @@ import pytest from murfey.server.ispyb import TransportManager -from murfey.workflows.clem.process_raw_lifs import zocalo_cluster_request +from murfey.workflows.clem.process_raw_lifs import run # Set up variables visit_name = "cm12345-6" @@ -29,7 +29,7 @@ def lif_file(raw_dir: Path): return file -def test_zocalo_cluster_request( +def test_run( lif_file: Path, raw_dir: Path, ): @@ -38,7 +38,7 @@ def test_zocalo_cluster_request( mock_transport.feedback_queue = feedback_queue # Run the function with the listed parameters - zocalo_cluster_request( + run( file=lif_file, root_folder=root_folder, session_id=session_id, diff --git a/tests/workflows/clem/test_process_raw_tiffs.py b/tests/workflows/clem/test_process_raw_tiffs.py index 6cc92bcf2..907a60ae6 100644 --- a/tests/workflows/clem/test_process_raw_tiffs.py +++ b/tests/workflows/clem/test_process_raw_tiffs.py @@ -4,7 +4,7 @@ import pytest from murfey.server.ispyb import TransportManager -from murfey.workflows.clem.process_raw_tiffs import zocalo_cluster_request +from murfey.workflows.clem.process_raw_tiffs import run # Set up variables session_id = 0 @@ -49,7 +49,7 @@ def metadata(raw_dir: Path): return metadata -def test_zocalo_cluster_request( +def test_run( tiff_list: list[Path], metadata: Path, raw_dir: Path, @@ -59,7 +59,7 @@ def test_zocalo_cluster_request( mock_transport.feedback_queue = feedback_queue # Run the function with the listed parameters - zocalo_cluster_request( + run( tiff_list=tiff_list, root_folder=root_folder, session_id=session_id, diff --git a/tests/workflows/clem/test_register_preprocessing_results.py b/tests/workflows/clem/test_register_preprocessing_results.py index b1fc1c0d9..a88b1adf7 100644 --- a/tests/workflows/clem/test_register_preprocessing_results.py +++ b/tests/workflows/clem/test_register_preprocessing_results.py @@ -254,7 +254,7 @@ def test_run( # Mock the align and merge workflow call mock_align_and_merge_call = mocker.patch( - "murfey.workflows.clem.register_preprocessing_results.submit_cluster_request" + "murfey.workflows.clem.register_preprocessing_results.run_align_and_merge" ) preprocessing_messages = generate_preprocessing_messages( @@ -340,7 +340,7 @@ def test_run_with_db( # Mock the align and merge workflow call mock_align_and_merge_call = mocker.patch( - "murfey.workflows.clem.register_preprocessing_results.submit_cluster_request" + "murfey.workflows.clem.register_preprocessing_results.run_align_and_merge" ) # Patch the TransportManager object in the workflows called From 4a4d8c56bbb2bdcea47289a9c71217fbac9f2e34 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Thu, 19 Mar 2026 15:58:01 +0000 Subject: [PATCH 09/11] Forgot to replace 'series_name' with 'site_name' --- src/murfey/workflows/clem/register_align_and_merge_results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/murfey/workflows/clem/register_align_and_merge_results.py b/src/murfey/workflows/clem/register_align_and_merge_results.py index bd39f5cb3..170d6f4b2 100644 --- a/src/murfey/workflows/clem/register_align_and_merge_results.py +++ b/src/murfey/workflows/clem/register_align_and_merge_results.py @@ -84,7 +84,7 @@ def run(message: dict, murfey_db: Session) -> dict[str, bool]: clem_img_series := murfey_db.exec( select(ImagingSite) .where(ImagingSite.session_id == session_id) - .where(ImagingSite.series_name == result.series_name) + .where(ImagingSite.site_name == result.series_name) ).one_or_none() ): clem_img_series = ImagingSite( From 9d5bc46dab435e0037801448a738f14eb2d5730e Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Thu, 19 Mar 2026 16:18:09 +0000 Subject: [PATCH 10/11] Removed helper funciton for inserting CLEM database entries --- src/murfey/workflows/clem/__init__.py | 156 -------------------------- 1 file changed, 156 deletions(-) diff --git a/src/murfey/workflows/clem/__init__.py b/src/murfey/workflows/clem/__init__.py index d06f6fd85..e69de29bb 100644 --- a/src/murfey/workflows/clem/__init__.py +++ b/src/murfey/workflows/clem/__init__.py @@ -1,156 +0,0 @@ -from __future__ import annotations - -import logging -import re -from pathlib import Path -from typing import Optional, Type, Union - -from sqlalchemy.exc import NoResultFound -from sqlmodel import Session, select - -from murfey.util.config import get_machine_config -from murfey.util.db import ( - ImagingSite, - Session as MurfeySession, -) - -logger = logging.getLogger("murfey.workflows.clem") - - -""" -HELPER FUNCTIONS FOR CLEM DATABASE -""" - - -def _validate_and_sanitise( - file: Path, - session_id: int, - db: Session, -) -> Path: - """ - Performs validation and sanitisation on the incoming file paths, ensuring that - no forbidden characters are present and that the the path points only to allowed - sections of the file server. - - Returns the file path as a sanitised string that can be converted into a Path - object again. - - NOTE: Due to the instrument name query, 'db' now needs to be passed as an - explicit variable to this function from within a FastAPI endpoint, as using the - instance that was imported directly won't load it in the correct state. - """ - - valid_file_types = ( - ".lif", - ".tif", - ".tiff", - ".xlif", - ".xml", - ) - - # Resolve symlinks and directory changes to get full file path - full_path = Path(file).resolve() - - # Use machine configuration to validate which file base paths are accepted from - instrument_name = ( - db.exec(select(MurfeySession).where(MurfeySession.id == session_id)) - .one() - .instrument_name - ) - machine_config = get_machine_config(instrument_name=instrument_name)[ - instrument_name - ] - rsync_basepath = (machine_config.rsync_basepath or Path("")).resolve() - - # Check that full file path doesn't contain unallowed characters - # Currently allows only: - # - words (alphanumerics and "_"; \w), - # - spaces (\s), - # - periods, - # - dashes, - # - forward slashes ("/") - if bool(re.fullmatch(r"^[\w\s\.\-/]+$", str(full_path))) is False: - raise ValueError(f"Unallowed characters present in {file}") - - # Check that it's not accessing somehwere it's not allowed - if not str(full_path).startswith(str(rsync_basepath)): - raise ValueError(f"{file} points to a directory that is not permitted") - - # Check that it is of a permitted file type - if f"{full_path.suffix}" not in valid_file_types: - raise ValueError(f"{full_path.suffix} is not a permitted file format") - - return full_path - - -def get_db_entry( - db: Session, - # With the database search funcion having been moved out of the FastAPI - # endpoint, the database now has to be explicitly passed within the FastAPI - # endpoint function in order for it to be loaded in the correct state. - table: Type[Union[ImagingSite,]], - session_id: int, - file_path: Optional[Path] = None, - series_name: Optional[str] = None, -) -> Union[ImagingSite,]: - """ - Searches the CLEM workflow-related tables in the Murfey database for an entry that - matches the file path or series name within a given session. Returns the entry if - a match is found, otherwise register it as a new entry in the database. - """ - - # Validate that parameters are provided correctly - if file_path is None and series_name is None: - raise ValueError( - "One of either 'file_path' or 'series_name' has to be provided" - ) - if file_path is not None and series_name is not None: - raise ValueError("Only one of 'file_path' or 'series_name' should be provided") - - # Validate file path if provided - if file_path is not None: - try: - file_path = _validate_and_sanitise(file_path, session_id, db) - except Exception: - raise Exception - - # Validate series name to use - if series_name is not None: - if bool(re.fullmatch(r"^[\w\s\.\-/]+$", series_name)) is False: - raise ValueError("One or more characters in the string are not permitted") - - # Return database entry if it exists - try: - db_entry = ( - db.exec( - select(table) - .where(table.session_id == session_id) - .where(table.file_path == str(file_path)) - ).one() - if file_path is not None - else db.exec( - select(table) - .where(table.session_id == session_id) - .where(table.series_name == series_name) - ).one() - ) - # Create and register new entry if not present - except NoResultFound: - db_entry = ( - table( - file_path=str(file_path), - session_id=session_id, - ) - if file_path is not None - else table( - series_name=series_name, - session_id=session_id, - ) - ) - db.add(db_entry) - db.commit() - - except Exception: - raise Exception - - return db_entry From e92995aab917d60de56311f03ca9d66a3fbec9c9 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Thu, 19 Mar 2026 16:29:33 +0000 Subject: [PATCH 11/11] Fixed broken test --- .../workflows/clem/test_register_preprocessing_results.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/workflows/clem/test_register_preprocessing_results.py b/tests/workflows/clem/test_register_preprocessing_results.py index a88b1adf7..3a48669f6 100644 --- a/tests/workflows/clem/test_register_preprocessing_results.py +++ b/tests/workflows/clem/test_register_preprocessing_results.py @@ -330,14 +330,6 @@ def test_run_with_db( return_value=ispyb_db_session, ) - # Mock out the machine config used in the helper sanitisation function - mock_get_machine_config = mocker.patch("murfey.workflows.clem.get_machine_config") - mock_machine_config = MagicMock() - mock_machine_config.rsync_basepath = rsync_basepath - mock_get_machine_config.return_value = { - ExampleVisit.instrument_name: mock_machine_config, - } - # Mock the align and merge workflow call mock_align_and_merge_call = mocker.patch( "murfey.workflows.clem.register_preprocessing_results.run_align_and_merge"