From f819f4c881487b9810afb9dfe44276665540faad Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Fri, 30 May 2025 13:24:06 +0100 Subject: [PATCH 1/5] Make the raw directory optional when doing SPA processing --- src/murfey/server/api/workflow.py | 13 +++++-------- src/murfey/util/config.py | 1 + 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/murfey/server/api/workflow.py b/src/murfey/server/api/workflow.py index df6da6705..90f0601dc 100644 --- a/src/murfey/server/api/workflow.py +++ b/src/murfey/server/api/workflow.py @@ -369,14 +369,11 @@ async def request_spa_preprocessing( visit_idx = parts.index(visit_name) core = Path("/") / Path(*parts[: visit_idx + 1]) ppath = Path("/") / Path(*parts) - sub_dataset = ppath.relative_to(core).parts[0] - extra_path = machine_config.processed_extra_directory - for i, p in enumerate(ppath.parts): - if p.startswith("raw"): - movies_path_index = i - break + if machine_config.process_multiple_datasets: + sub_dataset = ppath.relative_to(core).parts[0] else: - raise ValueError(f"{proc_file.path} does not contain a raw directory") + sub_dataset = "" + extra_path = machine_config.processed_extra_directory mrc_out = ( core / machine_config.processed_directory_name @@ -385,7 +382,7 @@ async def request_spa_preprocessing( / "MotionCorr" / "job002" / "Movies" - / "/".join(ppath.parts[movies_path_index + 1 : -1]) + / ppath.parent.relative_to(core / sub_dataset) / str(ppath.stem + "_motion_corrected.mrc") ) try: diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py index 31bb380fb..9b43fc74b 100644 --- a/src/murfey/util/config.py +++ b/src/murfey/util/config.py @@ -57,6 +57,7 @@ class MachineConfig(BaseModel): # type: ignore processing_enabled: bool = True process_by_default: bool = True gain_directory_name: str = "processing" + process_multiple_datasets: bool = True processed_directory_name: str = "processed" processed_extra_directory: str = "" recipes: dict[str, str] = { From d63b6da850840eea8b847f7166d05f700b14af0a Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Fri, 30 May 2025 13:30:09 +0100 Subject: [PATCH 2/5] Consistency between tomo and spa path construction --- src/murfey/server/api/workflow.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/murfey/server/api/workflow.py b/src/murfey/server/api/workflow.py index 90f0601dc..29269454f 100644 --- a/src/murfey/server/api/workflow.py +++ b/src/murfey/server/api/workflow.py @@ -643,10 +643,14 @@ async def request_tomography_preprocessing( machine_config = get_machine_config(instrument_name=instrument_name)[ instrument_name ] - visit_idx = Path(proc_file.path).parts.index(visit_name) - core = Path(*Path(proc_file.path).parts[: visit_idx + 1]) - ppath = Path("/".join(secure_filename(p) for p in Path(proc_file.path).parts)) - sub_dataset = "/".join(ppath.relative_to(core).parts[:-1]) + parts = [secure_filename(p) for p in Path(proc_file.path).parts] + visit_idx = parts.index(visit_name) + core = Path("/") / Path(*parts[: visit_idx + 1]) + ppath = Path("/") / Path(*parts) + if machine_config.process_multiple_datasets: + sub_dataset = ppath.relative_to(core).parts[0] + else: + sub_dataset = "" extra_path = machine_config.processed_extra_directory mrc_out = ( core From a7215c0248ba8a6d6016a79dc02b0b819e8e5a76 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Fri, 30 May 2025 13:39:55 +0100 Subject: [PATCH 3/5] Make those a common function --- src/murfey/server/api/workflow.py | 50 ++++------------------------ src/murfey/util/processing_params.py | 30 +++++++++++++++++ 2 files changed, 37 insertions(+), 43 deletions(-) diff --git a/src/murfey/server/api/workflow.py b/src/murfey/server/api/workflow.py index 29269454f..e95a2a9fe 100644 --- a/src/murfey/server/api/workflow.py +++ b/src/murfey/server/api/workflow.py @@ -58,7 +58,7 @@ TiltSeries, ) from murfey.util.models import ProcessingParametersSPA, ProcessingParametersTomo -from murfey.util.processing_params import default_spa_parameters +from murfey.util.processing_params import default_spa_parameters, motion_corrected_mrc from murfey.util.tomo import midpoint logger = getLogger("murfey.server.api.workflow") @@ -365,26 +365,7 @@ async def request_spa_preprocessing( machine_config = get_machine_config(instrument_name=instrument_name)[ instrument_name ] - parts = [secure_filename(p) for p in Path(proc_file.path).parts] - visit_idx = parts.index(visit_name) - core = Path("/") / Path(*parts[: visit_idx + 1]) - ppath = Path("/") / Path(*parts) - if machine_config.process_multiple_datasets: - sub_dataset = ppath.relative_to(core).parts[0] - else: - sub_dataset = "" - extra_path = machine_config.processed_extra_directory - mrc_out = ( - core - / machine_config.processed_directory_name - / sub_dataset - / extra_path - / "MotionCorr" - / "job002" - / "Movies" - / ppath.parent.relative_to(core / sub_dataset) - / str(ppath.stem + "_motion_corrected.mrc") - ) + mrc_out = motion_corrected_mrc(Path(proc_file.path), visit_name, machine_config) try: collected_ids = db.exec( select(DataCollectionGroup, DataCollection, ProcessingJob, AutoProcProgram) @@ -488,7 +469,8 @@ async def request_spa_preprocessing( _transport_object.send("processing_recipe", zocalo_message) else: logger.error( - f"Pe-processing was requested for {sanitise(ppath.name)} but no Zocalo transport object was found" + f"Pre-processing was requested for {sanitise(Path(proc_file.path).name)} " + "but no Zocalo transport object was found" ) return proc_file @@ -643,26 +625,7 @@ async def request_tomography_preprocessing( machine_config = get_machine_config(instrument_name=instrument_name)[ instrument_name ] - parts = [secure_filename(p) for p in Path(proc_file.path).parts] - visit_idx = parts.index(visit_name) - core = Path("/") / Path(*parts[: visit_idx + 1]) - ppath = Path("/") / Path(*parts) - if machine_config.process_multiple_datasets: - sub_dataset = ppath.relative_to(core).parts[0] - else: - sub_dataset = "" - extra_path = machine_config.processed_extra_directory - mrc_out = ( - core - / machine_config.processed_directory_name - / sub_dataset - / extra_path - / "MotionCorr" - / "job002" - / "Movies" - / str(ppath.stem + "_motion_corrected.mrc") - ) - mrc_out = Path("/".join(secure_filename(p) for p in mrc_out.parts)) + mrc_out = motion_corrected_mrc(Path(proc_file.path), visit_name, machine_config) recipe_name = machine_config.recipes.get("em-tomo-preprocess", "em-tomo-preprocess") @@ -733,7 +696,8 @@ async def request_tomography_preprocessing( _transport_object.send("processing_recipe", zocalo_message) else: logger.error( - f"Pe-processing was requested for {sanitise(ppath.name)} but no Zocalo transport object was found" + f"Pre-processing was requested for {sanitise(Path(proc_file.path).name)} " + f"but no Zocalo transport object was found" ) return proc_file else: diff --git a/src/murfey/util/processing_params.py b/src/murfey/util/processing_params.py index 66ea481c0..51638981c 100644 --- a/src/murfey/util/processing_params.py +++ b/src/murfey/util/processing_params.py @@ -1,6 +1,36 @@ +from pathlib import Path from typing import Literal, Optional from pydantic import BaseModel +from werkzeug.utils import secure_filename + +from murfey.util.config import MachineConfig + + +def motion_corrected_mrc( + input_movie: Path, visit_name: str, machine_config: MachineConfig +): + parts = [secure_filename(p) for p in input_movie.parts] + visit_idx = parts.index(visit_name) + core = Path("/") / Path(*parts[: visit_idx + 1]) + ppath = Path("/") / Path(*parts) + if machine_config.process_multiple_datasets: + sub_dataset = ppath.relative_to(core).parts[0] + else: + sub_dataset = "" + extra_path = machine_config.processed_extra_directory + mrc_out = ( + core + / machine_config.processed_directory_name + / sub_dataset + / extra_path + / "MotionCorr" + / "job002" + / "Movies" + / ppath.parent.relative_to(core / sub_dataset) + / str(ppath.stem + "_motion_corrected.mrc") + ) + return Path("/".join(secure_filename(p) for p in mrc_out.parts)) class CLEMAlignAndMergeParameters(BaseModel): From 366790cf8d9a84513edac2085d05a16de13e5062 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Mon, 2 Jun 2025 10:35:45 +0100 Subject: [PATCH 4/5] Use mrc path function for tilts in tilt series --- src/murfey/server/feedback.py | 33 ++++++--------------------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/src/murfey/server/feedback.py b/src/murfey/server/feedback.py index 8ef95bf6f..66c652f33 100644 --- a/src/murfey/server/feedback.py +++ b/src/murfey/server/feedback.py @@ -51,7 +51,7 @@ get_microscope, get_security_config, ) -from murfey.util.processing_params import default_spa_parameters +from murfey.util.processing_params import default_spa_parameters, motion_corrected_mrc from murfey.util.tomo import midpoint logger = logging.getLogger("murfey.server.feedback") @@ -106,37 +106,16 @@ def get_all_tilts(tilt_series_id: int) -> List[str]: ).all() if not complete_results: return [] + visit_name = complete_results[0][2].visit instrument_name = complete_results[0][2].instrument_name results = [r[0] for r in complete_results] machine_config = get_machine_config(instrument_name=instrument_name)[ instrument_name ] - - def _mc_path(mov_path: Path) -> str: - for p in mov_path.parts: - if "-" in p and p.startswith(("bi", "nr", "nt", "cm", "sw")): - visit_name = p - break - else: - raise ValueError(f"No visit found in {mov_path}") - visit_idx = Path(mov_path).parts.index(visit_name) - core = Path(*Path(mov_path).parts[: visit_idx + 1]) - ppath = Path(mov_path) - sub_dataset = "/".join(ppath.relative_to(core).parts[:-1]) - extra_path = machine_config.processed_extra_directory - mrc_out = ( - core - / machine_config.processed_directory_name - / sub_dataset - / extra_path - / "MotionCorr" - / "job002" - / "Movies" - / str(ppath.stem + "_motion_corrected.mrc") - ) - return str(mrc_out) - - return [_mc_path(Path(r.movie_path)) for r in results] + return [ + motion_corrected_mrc(Path(r.movie_path), visit_name, machine_config) + for r in results + ] def get_job_ids(tilt_series_id: int, appid: int) -> JobIDs: From f491f069c41b519d8c49876ca6c79cbd2abe40f7 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Mon, 2 Jun 2025 11:45:53 +0100 Subject: [PATCH 5/5] Should be str not path --- src/murfey/server/feedback.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/murfey/server/feedback.py b/src/murfey/server/feedback.py index 66c652f33..80ca02cbb 100644 --- a/src/murfey/server/feedback.py +++ b/src/murfey/server/feedback.py @@ -113,7 +113,7 @@ def get_all_tilts(tilt_series_id: int) -> List[str]: instrument_name ] return [ - motion_corrected_mrc(Path(r.movie_path), visit_name, machine_config) + str(motion_corrected_mrc(Path(r.movie_path), visit_name, machine_config)) for r in results ]