diff --git a/src/murfey/server/api/workflow.py b/src/murfey/server/api/workflow.py index df6da6705..e95a2a9fe 100644 --- a/src/murfey/server/api/workflow.py +++ b/src/murfey/server/api/workflow.py @@ -58,7 +58,7 @@ TiltSeries, ) from murfey.util.models import ProcessingParametersSPA, ProcessingParametersTomo -from murfey.util.processing_params import default_spa_parameters +from murfey.util.processing_params import default_spa_parameters, motion_corrected_mrc from murfey.util.tomo import midpoint logger = getLogger("murfey.server.api.workflow") @@ -365,29 +365,7 @@ async def request_spa_preprocessing( machine_config = get_machine_config(instrument_name=instrument_name)[ instrument_name ] - parts = [secure_filename(p) for p in Path(proc_file.path).parts] - visit_idx = parts.index(visit_name) - core = Path("/") / Path(*parts[: visit_idx + 1]) - ppath = Path("/") / Path(*parts) - sub_dataset = ppath.relative_to(core).parts[0] - extra_path = machine_config.processed_extra_directory - for i, p in enumerate(ppath.parts): - if p.startswith("raw"): - movies_path_index = i - break - else: - raise ValueError(f"{proc_file.path} does not contain a raw directory") - mrc_out = ( - core - / machine_config.processed_directory_name - / sub_dataset - / extra_path - / "MotionCorr" - / "job002" - / "Movies" - / "/".join(ppath.parts[movies_path_index + 1 : -1]) - / str(ppath.stem + "_motion_corrected.mrc") - ) + mrc_out = motion_corrected_mrc(Path(proc_file.path), visit_name, machine_config) try: collected_ids = db.exec( select(DataCollectionGroup, DataCollection, ProcessingJob, AutoProcProgram) @@ -491,7 +469,8 @@ async def request_spa_preprocessing( _transport_object.send("processing_recipe", zocalo_message) else: logger.error( - f"Pe-processing was requested for {sanitise(ppath.name)} but no Zocalo transport object was found" + f"Pre-processing was requested for {sanitise(Path(proc_file.path).name)} " + "but no Zocalo transport object was found" ) return proc_file @@ -646,22 +625,7 @@ async def request_tomography_preprocessing( machine_config = get_machine_config(instrument_name=instrument_name)[ instrument_name ] - visit_idx = Path(proc_file.path).parts.index(visit_name) - core = Path(*Path(proc_file.path).parts[: visit_idx + 1]) - ppath = Path("/".join(secure_filename(p) for p in Path(proc_file.path).parts)) - sub_dataset = "/".join(ppath.relative_to(core).parts[:-1]) - extra_path = machine_config.processed_extra_directory - mrc_out = ( - core - / machine_config.processed_directory_name - / sub_dataset - / extra_path - / "MotionCorr" - / "job002" - / "Movies" - / str(ppath.stem + "_motion_corrected.mrc") - ) - mrc_out = Path("/".join(secure_filename(p) for p in mrc_out.parts)) + mrc_out = motion_corrected_mrc(Path(proc_file.path), visit_name, machine_config) recipe_name = machine_config.recipes.get("em-tomo-preprocess", "em-tomo-preprocess") @@ -732,7 +696,8 @@ async def request_tomography_preprocessing( _transport_object.send("processing_recipe", zocalo_message) else: logger.error( - f"Pe-processing was requested for {sanitise(ppath.name)} but no Zocalo transport object was found" + f"Pre-processing was requested for {sanitise(Path(proc_file.path).name)} " + f"but no Zocalo transport object was found" ) return proc_file else: diff --git a/src/murfey/server/feedback.py b/src/murfey/server/feedback.py index 8ef95bf6f..80ca02cbb 100644 --- a/src/murfey/server/feedback.py +++ b/src/murfey/server/feedback.py @@ -51,7 +51,7 @@ get_microscope, get_security_config, ) -from murfey.util.processing_params import default_spa_parameters +from murfey.util.processing_params import default_spa_parameters, motion_corrected_mrc from murfey.util.tomo import midpoint logger = logging.getLogger("murfey.server.feedback") @@ -106,37 +106,16 @@ def get_all_tilts(tilt_series_id: int) -> List[str]: ).all() if not complete_results: return [] + visit_name = complete_results[0][2].visit instrument_name = complete_results[0][2].instrument_name results = [r[0] for r in complete_results] machine_config = get_machine_config(instrument_name=instrument_name)[ instrument_name ] - - def _mc_path(mov_path: Path) -> str: - for p in mov_path.parts: - if "-" in p and p.startswith(("bi", "nr", "nt", "cm", "sw")): - visit_name = p - break - else: - raise ValueError(f"No visit found in {mov_path}") - visit_idx = Path(mov_path).parts.index(visit_name) - core = Path(*Path(mov_path).parts[: visit_idx + 1]) - ppath = Path(mov_path) - sub_dataset = "/".join(ppath.relative_to(core).parts[:-1]) - extra_path = machine_config.processed_extra_directory - mrc_out = ( - core - / machine_config.processed_directory_name - / sub_dataset - / extra_path - / "MotionCorr" - / "job002" - / "Movies" - / str(ppath.stem + "_motion_corrected.mrc") - ) - return str(mrc_out) - - return [_mc_path(Path(r.movie_path)) for r in results] + return [ + str(motion_corrected_mrc(Path(r.movie_path), visit_name, machine_config)) + for r in results + ] def get_job_ids(tilt_series_id: int, appid: int) -> JobIDs: diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py index 31bb380fb..9b43fc74b 100644 --- a/src/murfey/util/config.py +++ b/src/murfey/util/config.py @@ -57,6 +57,7 @@ class MachineConfig(BaseModel): # type: ignore processing_enabled: bool = True process_by_default: bool = True gain_directory_name: str = "processing" + process_multiple_datasets: bool = True processed_directory_name: str = "processed" processed_extra_directory: str = "" recipes: dict[str, str] = { diff --git a/src/murfey/util/processing_params.py b/src/murfey/util/processing_params.py index 66ea481c0..51638981c 100644 --- a/src/murfey/util/processing_params.py +++ b/src/murfey/util/processing_params.py @@ -1,6 +1,36 @@ +from pathlib import Path from typing import Literal, Optional from pydantic import BaseModel +from werkzeug.utils import secure_filename + +from murfey.util.config import MachineConfig + + +def motion_corrected_mrc( + input_movie: Path, visit_name: str, machine_config: MachineConfig +): + parts = [secure_filename(p) for p in input_movie.parts] + visit_idx = parts.index(visit_name) + core = Path("/") / Path(*parts[: visit_idx + 1]) + ppath = Path("/") / Path(*parts) + if machine_config.process_multiple_datasets: + sub_dataset = ppath.relative_to(core).parts[0] + else: + sub_dataset = "" + extra_path = machine_config.processed_extra_directory + mrc_out = ( + core + / machine_config.processed_directory_name + / sub_dataset + / extra_path + / "MotionCorr" + / "job002" + / "Movies" + / ppath.parent.relative_to(core / sub_dataset) + / str(ppath.stem + "_motion_corrected.mrc") + ) + return Path("/".join(secure_filename(p) for p in mrc_out.parts)) class CLEMAlignAndMergeParameters(BaseModel):