diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4a7121d..d24dc05 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Create latest tag uses: Actions-R-Us/actions-tagger@latest diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 307ea52..b4acccf 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -40,8 +40,8 @@ jobs: - name: Run Unit Tests run: | REMOTE=/app/nf-encyclopedia - docker run -v $(pwd):${REMOTE} -w ${REMOTE} nf-encyclopedia:latest \ - pytest tests/unit_tests + docker run -v $(pwd):${REMOTE} nf-encyclopedia:latest \ + pytest ${REMOTE}/tests/unit_tests - name: Run System Tests run: | diff --git a/.gitignore b/.gitignore index fc0d425..e8f2d3f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .nextflow +venv .Rhistory .idea/** .gradle diff --git a/Dockerfile b/Dockerfile index 61ff417..ad7d099 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,66 +1,77 @@ FROM --platform=linux/amd64 mambaorg/micromamba:latest as micromamba -# First Stage of the build, gets the jar for encyclopedia -FROM --platform=linux/amd64 openjdk:8-jre +# FROM --platform=linux/amd64 ibmjava:11 +# FROM --platform=linux/amd64 amazoncorretto:11-al2023-headless # Uses yum for package management. +FROM --platform=linux/amd64 nextflow/nextflow:23.04.2 + ARG VERSION=2.12.30 ENV VERSION ${VERSION} +LABEL authors="wfondrie@talus.bio" \ + description="Docker image for most of nf-encyclopedia" + + +# Install procps so that Nextflow can poll CPU usage and +# deep clean the apt cache to reduce image/layer size +# RUN apt-get install -y procps sqlite3 libgomp1 \ +# && apt-get clean -y && rm -rf /var/lib/apt/lists/* +RUN yum install -y wget -RUN apt-get update && \ - apt-get -y upgrade && \ - apt-get -y install libgomp1 && \ - apt-get clean WORKDIR /code +# First Stage of the build, gets the jar for encyclopedia RUN wget https://bitbucket.org/searleb/encyclopedia/downloads/encyclopedia-${VERSION}-executable.jar -WORKDIR /app -LABEL authors="wfondrie@talus.bio" \ - description="Docker image for most of nf-encyclopedia" +# # Install nextflow +# RUN wget -qO- https://get.nextflow.io | bash +# RUN chmod +x nextflow +# RUN mv nextflow /usr/local/bin/. -# Install procps so that Nextflow can poll CPU usage and -# deep clean the apt cache to reduce image/layer size -RUN apt-get update \ - && apt-get install -y procps sqlite3 \ - && apt-get clean -y && rm -rf /var/lib/apt/lists/* +WORKDIR /app # Setup micromamba: -ARG MAMBA_USER=mamba +ARG MAMBA_USER=root ARG MAMBA_USER_ID=1000 ARG MAMBA_USER_GID=1000 +ARG MAMBA_DOCKERFILE_ACTIVATE=1 ENV MAMBA_USER=$MAMBA_USER ENV MAMBA_ROOT_PREFIX="/opt/conda" ENV MAMBA_EXE="/bin/micromamba" +RUN mkdir -p ${MAMBA_ROOT_PREFIX} COPY --from=micromamba "$MAMBA_EXE" "$MAMBA_EXE" COPY --from=micromamba /usr/local/bin/_activate_current_env.sh /usr/local/bin/_activate_current_env.sh COPY --from=micromamba /usr/local/bin/_dockerfile_shell.sh /usr/local/bin/_dockerfile_shell.sh -COPY --from=micromamba /usr/local/bin/_entrypoint.sh /usr/local/bin/_entrypoint.sh +COPY --from=micromamba /usr/local/bin/_entrypoint.sh /usr/local/bin/mamba_entrypoint.sh COPY --from=micromamba /usr/local/bin/_activate_current_env.sh /usr/local/bin/_activate_current_env.sh COPY --from=micromamba /usr/local/bin/_dockerfile_initialize_user_accounts.sh /usr/local/bin/_dockerfile_initialize_user_accounts.sh COPY --from=micromamba /usr/local/bin/_dockerfile_setup_root_prefix.sh /usr/local/bin/_dockerfile_setup_root_prefix.sh -RUN /usr/local/bin/_dockerfile_initialize_user_accounts.sh && \ - /usr/local/bin/_dockerfile_setup_root_prefix.sh +# No need to set up accounts if we will run as root ... +# RUN /usr/local/bin/_dockerfile_initialize_user_accounts.sh && + +RUN /usr/local/bin/_dockerfile_setup_root_prefix.sh # Setup the environment -USER root +USER $MAMBA_USER COPY environment.yml /tmp/environment.yml # Instruct R processes to use these empty files instead of # clashing with a local one RUN touch .Rprofile .Renviron -# Create the environment -RUN micromamba install -y -n base -f /tmp/environment.yml && \ - micromamba clean --all --yes +# Set the path. NextFlow seems to circumvent the conda environment +# We also need to set options for the JRE here. +ENV PATH="$MAMBA_ROOT_PREFIX/bin:$PATH:/bin" _JAVA_OPTIONS="-Djava.awt.headless=true" VERSION=$VERSION # Setup the EncyclopeDIA executable: RUN ln -s /code/encyclopedia-$VERSION-executable.jar /code/encyclopedia.jar -# Set the path. NextFlow seems to circumvent the conda environment -# We also need to set options for the JRE here. -ENV PATH="$MAMBA_ROOT_PREFIX/bin:$PATH" _JAVA_OPTIONS="-Djava.awt.headless=true" VERSION=$VERSION - # Create the entrypoint: -SHELL ["/usr/local/bin/_dockerfile_shell.sh"] -ENTRYPOINT ["/usr/local/bin/_entrypoint.sh"] -CMD [] +# SHELL ["/usr/local/bin/_dockerfile_shell.sh"] +# ENTRYPOINT ["/usr/local/bin/mamba_entrypoint.sh", "/usr/local/bin/entry.sh"] + +# Create the environment +RUN micromamba install -y -n base -f /tmp/environment.yml && \ + micromamba clean --all --yes + +CMD ["/bin/bash"] + diff --git a/assets/template.sky.zip b/assets/template.sky.zip new file mode 100644 index 0000000..60e729a Binary files /dev/null and b/assets/template.sky.zip differ diff --git a/environment.yml b/environment.yml index e8c1b41..c787b45 100644 --- a/environment.yml +++ b/environment.yml @@ -8,9 +8,9 @@ dependencies: - pytest - numpy - pandas - - bioconductor-msstats=4.2.0 - - r-rlang - - r-dplyr - - r-tidyr - - r-magrittr - - r-stringr + - bioconda::bioconductor-msstats + - conda-forge::r-rlang + - conda-forge::r-dplyr + - conda-forge::r-tidyr + - r::r-magrittr + - r::r-stringr diff --git a/main.nf b/main.nf index 3b2be0a..11a73d3 100644 --- a/main.nf +++ b/main.nf @@ -3,7 +3,7 @@ nextflow.enable.dsl = 2 // Subworkflows -include { CONVERT_TO_MZML } from "./subworkflows/msconvert" +include { CONVERT_TO_MZML } from "./subworkflows/convert" include { BUILD_CHROMATOGRAM_LIBRARY; PERFORM_QUANT; @@ -12,6 +12,12 @@ include { // Modules include { MSSTATS } from "./modules/msstats" +include { ADD_IMS_INFO } from "./modules/ims" +include { + SKYLINE_ADD_LIB; + SKYLINE_IMPORT_DATA; + SKYLINE_MERGE_RESULTS +} from "./modules/skyline" // @@ -49,6 +55,7 @@ workflow { input = file(params.input, checkIfExists: true) fasta = file(params.fasta, checkIfExists: true) dlib = file(params.dlib, checkIfExists: true) + skyline_empty_template = file("$baseDir/assets/template.sky.zip", checkIfExists: true) // Optional contrasts arg: if ( params.contrasts != null ) { @@ -74,6 +81,16 @@ workflow { error "No MS data files were provided. Nothing to do." } + // Raw Mass Spec files (raw including .raw or .d/.tar) + // These files will be used later to quant using skyline. + // This also filter out files that are chromatogram libraries + ms_files.runs + | join(ms_files.meta) + | filter { !it[1] } + | map { it[0] } + | filter( ~/^.*((.raw)|(.d.tar))$/ ) + | set { raw_quant_files } + // Convert raw files to gzipped mzML and group them by experiment. // The chrlib and quant channels take the following form: // [[file_ids], [mzml_gz_files], is_chrlib, group] @@ -107,13 +124,42 @@ workflow { PERFORM_QUANT(quant_files, dlib, fasta, params.aggregate) | set { quant_results } + quant_results.local + | map { it[0] } + | set { groups } + + // Add IMS info to the blib + ADD_IMS_INFO(groups, quant_results.blib) + | set { blib } + + SKYLINE_ADD_LIB(skyline_empty_template, fasta, blib) + | set { skyline_template_zipfile } + + // This will generate a skyd for every raw data file + SKYLINE_IMPORT_DATA( + skyline_template_zipfile.skyline_zipfile, + raw_quant_files, + ) + | set { skyline_import_results } + + SKYLINE_MERGE_RESULTS( + skyline_template_zipfile.skyline_zipfile, + skyline_import_results.skyd_file.collect(), + raw_quant_files.collect(), + ) + | set { skyline_merge_results } + + skyline_merge_results.final_skyline_zipfile.view() + // Perform an aggregate analysis on all files if needed: if ( params.aggregate ) { // Aggregate quantitative runs with EncyclopeDIA. // The output has one channel: - // global -> [agg_name, peptides, proteins] or null + // global -> [agg_name, peptides_txt, proteins_txt] or null + // lib -> blib PERFORM_AGGREGATE_QUANT(quant_results.local, dlib, fasta) | set { enc_results } + } else { quant_results | set{ enc_results } } @@ -122,6 +168,8 @@ workflow { if ( params.msstats.enabled ) { MSSTATS(enc_results.global, input, contrasts) } + + // } diff --git a/modules/convert.nf b/modules/convert.nf new file mode 100644 index 0000000..3e41acd --- /dev/null +++ b/modules/convert.nf @@ -0,0 +1,61 @@ +process MSCONVERT { + publishDir "${params.mzml_dir}/${outputDir}", failOnError: true + label 'process_low_constant' + label 'error_retry' + + input: + tuple val(file_id), path(raw_input), val(outputDir) + + output: + tuple val(file_id), path("${raw_input.baseName}.mzML.gz") + + script: + """ + wine msconvert \\ + -v \\ + --gzip \\ + --mzML \\ + --64 \\ + --zlib \\ + --filter "peakPicking true 1-" \\ + ${params.msconvert.demultiplex ? '--filter "demultiplex optimization=overlap_only"' : ''} \\ + ${raw_input} + """ + + stub: + """ + touch ${raw_input.baseName}.mzML.gz + """ +} + + +process TDF2MZML { + publishDir "${params.mzml_dir}/${outputDir}", pattern: "*.mzML.gz", failOnError: true + container 'mfreitas/tdf2mzml:latest' // I don't know which stable tag to use... + label 'process_single' + label 'error_retry' + + input: + tuple val(file_id), path(tdf_input), val(outputDir) + + output: + tuple val(file_id), path("${file(tdf_input.baseName).baseName}.mzML.gz") + + script: + """ + echo "Unpacking..." + tar -xvf ${tdf_input} + + echo "Converting..." + tdf2mzml.py -i *.d # --ms1_type "centroid" + + echo "Compressing..." + mv *.mzml ${file(tdf_input.baseName).baseName}.mzML + gzip ${file(tdf_input.baseName).baseName}.mzML + """ + + stub: + """ + touch ${file(tdf_input.baseName).baseName}.mzML.gz + """ +} \ No newline at end of file diff --git a/modules/encyclopedia.nf b/modules/encyclopedia.nf index 01bd427..b2e3dc8 100644 --- a/modules/encyclopedia.nf +++ b/modules/encyclopedia.nf @@ -23,7 +23,7 @@ def stem(suffix) { process ENCYCLOPEDIA_SEARCH { publishDir "${params.result_dir}/${group}/elib", pattern: '*.elib', failOnError: true publishDir "${params.result_dir}/${group}/logs", pattern: '*.log', failOnError: true - label 'process_medium' + label 'process_high' input: tuple val(group), path(mzml_gz_file) @@ -70,6 +70,7 @@ process ENCYCLOPEDIA_SEARCH { process ENCYCLOPEDIA_AGGREGATE { publishDir "${params.result_dir}/${group}/elib", pattern: '*.elib', failOnError: true + publishDir "${params.result_dir}/${group}/blib", pattern: '*.blib', failOnError: true publishDir "${params.result_dir}/${group}/logs", pattern: '*.log', failOnError: true publishDir "${params.result_dir}/${group}/results", pattern: '*.txt', failOnError: true publishDir "${params.result_dir}/${group}/reports", pattern: '*.csv', failOnError: true @@ -92,6 +93,7 @@ process ENCYCLOPEDIA_AGGREGATE { tuple( val(group), path("${stem(output_suffix)}.elib"), + path("${stem(output_suffix)}.blib"), path("${stem(output_suffix)}.global.log"), path("${output_suffix}_detection_summary.csv"), emit: "lib" @@ -122,6 +124,9 @@ process ENCYCLOPEDIA_AGGREGATE { -a ${align} \\ | tee ${stem(output_suffix)}.global.log + ${execEncyclopedia(task.memory)} \\ + -convert -libtoblib -i ${stem(output_suffix)}.elib + # Better file names: if [ "${align}" = true ]; then mv ${stem(output_suffix)}.elib.peptides.txt ${stem(output_suffix)}.peptides.txt @@ -140,6 +145,7 @@ process ENCYCLOPEDIA_AGGREGATE { stub: """ touch ${stem(output_suffix)}.elib + touch ${stem(output_suffix)}.blib if [ "${align}" = true ]; then touch ${stem(output_suffix)}.peptides.txt diff --git a/modules/ims.nf b/modules/ims.nf new file mode 100644 index 0000000..7e840cf --- /dev/null +++ b/modules/ims.nf @@ -0,0 +1,24 @@ + +process ADD_IMS_INFO { + publishDir "${params.result_dir}/${group}/blib", pattern: '*.ims.blib', failOnError: true + label 'process_medium' + container 'ghcr.io/talusbio/flimsay:v0.4.0' + + input: + val group + path blib + + output: + path("*.ims.blib"), emit: blib + + script: + """ + flimsay fill_blib ${blib} blib.ims.blib + """ + + stub: + """ + echo "${blib}" + touch blib.ims.blib + """ +} diff --git a/modules/msconvert.nf b/modules/msconvert.nf deleted file mode 100644 index b3430c3..0000000 --- a/modules/msconvert.nf +++ /dev/null @@ -1,29 +0,0 @@ -process MSCONVERT { - publishDir "${params.mzml_dir}/${outputDir}", failOnError: true - label 'process_low_constant' - label 'error_retry' - - input: - tuple val(file_id), path(raw_input), val(outputDir) - - output: - tuple val(file_id), path("${raw_input.baseName}.mzML.gz") - - script: - """ - wine msconvert \\ - -v \\ - --gzip \\ - --mzML \\ - --64 \\ - --zlib \\ - --filter "peakPicking true 1-" \\ - ${params.msconvert.demultiplex ? '--filter "demultiplex optimization=overlap_only"' : ''} \\ - ${raw_input} - """ - - stub: - """ - touch ${raw_input.baseName}.mzML.gz - """ -} diff --git a/modules/skyline.nf b/modules/skyline.nf new file mode 100644 index 0000000..3e76334 --- /dev/null +++ b/modules/skyline.nf @@ -0,0 +1,188 @@ + +// Code modified from https://github.com/mriffle/nf-teirex-dia/blob/main/modules/skyline.nf + +process SKYLINE_ADD_LIB { + publishDir "${params.result_dir}/skyline/", failOnError: true, mode: 'copy' + label 'process_medium' + label 'error_retry' + container 'quay.io/protio/pwiz-skyline-i-agree-to-the-vendor-licenses:3.0.23187-2243781' + + input: + path skyline_template_zipfile + path fasta + path blib + + output: + path("results.sky.zip"), emit: skyline_zipfile + path("skyline_add_library.log"), emit: log + + script: + """ + unzip ${skyline_template_zipfile} + + wine SkylineCmd \ + --in="${skyline_template_zipfile.baseName}" \ + --import-fasta="${fasta}" \ + --add-library-path="${blib}" \ + --out="results.sky" \ + --save \ + --share-zip="results.sky.zip" \ + --share-type="complete" \ + --tran-precursor-ion-charges="2, 3 ,4" \ + --tran-product-ion-charges="1,2" \ + --tran-product-ion-types="b, y, p" \ + --tran-use-dia-window-exclusion \ + --library-pick-product-ions="all_plus" \ + --tran-product-start-ion="ion 2" \ + --tran-product-end-ion="last ion - 2" \ + --associate-proteins-minimal-protein-list \ + --associate-proteins-group-proteins \ + --full-scan-product-res=10.0 \ + --full-scan-product-analyzer=centroided \ + --full-scan-rt-filter-tolerance=2 \ + --ims-library-res=30 \ + --decoys-add=shuffle \ + --timestamp \ + --memstamp \ + 2>&1 | tee skyline_add_library.log \ + """ + + stub: + """ + echo "${skyline_template_zipfile}" + echo "${fasta}" + echo "${blib}" + touch skyline_add_library.log + touch results.sky.zip + """ +} + +process SKYLINE_IMPORT_DATA { + publishDir "${params.result_dir}/skyline/", failOnError: true, mode: 'copy' + label 'process_medium' + label 'process_high_memory' + label 'error_retry' + container 'quay.io/protio/pwiz-skyline-i-agree-to-the-vendor-licenses:3.0.23187-2243781' + + input: + path skyline_zipfile + each path(raw_file) + + output: + path("*.skyd"), emit: skyd_file + path("${raw_file.baseName}.log"), emit: log_file + + script: + """ + unzip ${skyline_zipfile} + if [[ ${raw_file} == *.d.tar ]] ; then + tar -xvf ${raw_file} + local_rawfile=\$(find \${PWD} -d -name "*.d") + import_extra_params=" --full-scan-isolation-scheme=\${local_rawfile}" + else + local_rawfile=${raw_file} + import_extra_params=" --full-scan-isolation-scheme=\${local_rawfile}" + fi + + wine SkylineCmd \ + --in="${skyline_zipfile.baseName}" \ + --import-no-join \ + --import-file="\${local_rawfile}" \ + --full-scan-acquisition-method="DIA" \ + --timestamp \ + --memstamp \ + \${import_extra_params} \ + 2>&1 | tee "${raw_file.baseName}.log" + """ + + stub: + """ + echo "${skyline_zipfile}" + echo "${raw_file}" + touch ${raw_file.baseName}.skyd + touch ${raw_file.baseName}.log + """ +} + +process SKYLINE_MERGE_RESULTS { + publishDir "${params.result_dir}/skyline/", failOnError: true, mode: 'copy' + label 'process_high' + label 'error_retry' + container 'quay.io/protio/pwiz-skyline-i-agree-to-the-vendor-licenses:3.0.23187-2243781' + + input: + path skyline_zipfile + path '*.skyd' + path raw_files + + output: + path("final.sky.zip"), emit: final_skyline_zipfile + path("skyline-merge.log"), emit: log + + script: + """ + echo "Input Raw files" + echo ${raw_files} + + echo "Directory status >>>>" + ls -lctha # For debugging ... + + echo "Unzipping skyline template file" + unzip ${skyline_zipfile} + + import_files_params="" + import_extra_params="" + + if [[ \$(find \${PWD} -type f -name "*.d.tar") ]] ; then + for f in *.d.tar ; do + echo "Decompressing \${f}" + tar -xvf \${f} + done + else + echo "No compressed .d files found" + fi + + if [[ \$(find \${PWD} -type d -name "*.d") ]] ; then + for f in *.d ; do + import_files_params=" \${import_files_params} --import-file=\${f}" + import_extra_params=" --full-scan-isolation-scheme=\${f}" + done + fi + + echo "Import file params >>>" + echo \${import_files_params} + + for ftype in raw mzml mzML; do + echo ">>> Looking for \${ftype} files" + for f in \$(find \${PWD} -type f -name "*.\${ftype}"); do + import_files_params=" \${import_files_params} --import-file=\${f}" + done + done + + echo "Import file params >>>" + echo \${import_files_params} + + wine SkylineCmd \ + --in="${skyline_zipfile.baseName}" \ + \${import_files_params} \ + --out="final.sky" \ + --save \ + --share-zip="final.sky.zip" \ + --share-type="complete" \ + --reintegrate-model-name="reintegration_res" \ + --reintegrate-create-model \ + --timestamp \ + --memstamp \ + \${import_extra_params} \ + 2>&1 | tee "skyline-merge.log" + + echo "Directory status >>>>" + ls -lctha # For debugging ... + """ + + stub: + """ + touch skyline-merge.log + touch final.sky.zip + """ +} diff --git a/nextflow.config b/nextflow.config index 0acaf12..c952eda 100644 --- a/nextflow.config +++ b/nextflow.config @@ -88,7 +88,7 @@ params { * The default attempts to match the defaults from the graphical * user interface. */ - encyclopedia.args = '-percolatorVersion v3-01 -quantifyAcrossSamples true -scoringBreadthType window' + encyclopedia.args = '-percolatorVersion v3-01 -quantifyAcrossSamples true -scoringBreadthType window -minLength 7 -maxLength 40 -minIntensity 50 -filterPeaklists true ' /** Additional command line arguments to use when searching files. */ encyclopedia.local.args = '' diff --git a/subworkflows/msconvert.nf b/subworkflows/convert.nf similarity index 68% rename from subworkflows/msconvert.nf rename to subworkflows/convert.nf index 1d889d7..8cd1e30 100644 --- a/subworkflows/msconvert.nf +++ b/subworkflows/convert.nf @@ -1,6 +1,7 @@ -include { MSCONVERT } from "../modules/msconvert.nf" +include { MSCONVERT; TDF2MZML } from "../modules/convert" + workflow CONVERT_TO_MZML { take: raw_files @@ -21,7 +22,17 @@ workflow CONVERT_TO_MZML { } | set { staging } - MSCONVERT(staging.mzml_absent) + staging.mzml_absent + | branch { + is_tdf: it[0].toLowerCase().endsWith(".d.tar") + return it + is_raw: true + return it + } + |set { to_convert } + + MSCONVERT(to_convert.is_raw) + | concat(TDF2MZML(to_convert.is_tdf)) | concat(staging.is_mzml) | concat(staging.mzml_present) | set { results } diff --git a/subworkflows/encyclopedia.nf b/subworkflows/encyclopedia.nf index fda0e44..dae9a65 100644 --- a/subworkflows/encyclopedia.nf +++ b/subworkflows/encyclopedia.nf @@ -41,7 +41,7 @@ workflow BUILD_CHROMATOGRAM_LIBRARY { params.encyclopedia.chrlib_suffix, false // Don't align RTs ).lib - | map { it -> tuple it[0], it[1] } + | map { eait -> tuple eait[0], eait[1] } | set { output_elib } emit: @@ -61,9 +61,9 @@ workflow PERFORM_QUANT { // output is [group, mzml_gz_file, elib] quant_files | transpose() - | multiMap { it -> - mzml: tuple it[0], it[1] - elib: it[2] + | multiMap { qit -> + mzml: tuple qit[0], qit[1] + elib: qit[2] } | set { ungrouped_files } @@ -88,22 +88,33 @@ workflow PERFORM_QUANT { if ( local_only ) { Channel.empty() | set { global_files } Channel.empty() | set { msstats_files } + Channel.empty() | set { global_blib } } else { // Do the global analysis - // Output is [group, peptides_txt, proteins_txt] + // Output is [ + // lib: [group, elib, blib, log, summary.csv] + // quant: [group, peptides_txt, proteins_txt] + // ] ENCYCLOPEDIA_AGGREGATE( - local_files, - dlib, - fasta, - params.encyclopedia.quant_suffix, - true // Align RTs - ).quant - | set { global_files } + local_files, + dlib, + fasta, + params.encyclopedia.quant_suffix, + true // Align RTs + ) + | set { agg_outs } + + agg_outs.lib | + map { libe -> libe[2] } | + set { global_blib } + agg_outs.quant + | set { global_files } } emit: local = local_files global = global_files + blib = global_blib } @@ -136,9 +147,18 @@ workflow PERFORM_AGGREGATE_QUANT { fasta, params.encyclopedia.quant_suffix, true // Align RTs - ).quant + ) + | set { agg_results } + + agg_results.quant | set { global_files } + // Lib is ['aggregated', .elib, .blib, .log, summary.csv] + agg_results.lib + | map { libe -> libe[1] } + | set { blib } + emit: global = global_files + blib = blib } diff --git a/tests/conftest.py b/tests/conftest.py index 06ff8c6..8718087 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,18 +14,31 @@ def base_project(tmp_path): raw_dir = tmp_path / "subdir" raw_dir.mkdir() raw_files = [raw_dir / f"{f}.raw" for f in "abcdefghijklm"] + dot_d_files = [raw_dir / f"{f}.d.tar" for f in "opqrs"] for raw_file in raw_files: raw_file.touch() + for raw_file in dot_d_files: + raw_file.touch() mzml_file = raw_dir / "n.mzML.gz" mzml_file.touch() raw_files.append(mzml_file) + raw_files.extend(dot_d_files) chrlibs = ["true"] * 6 + ["false"] * 8 - groups = "xyz" * 4 + "z" * 2 + chrlibs.extend(["false"] * len(dot_d_files)) + groups = "xyz" * 4 + "z" * 2 + "" * 8 + "w" * len(dot_d_files) # create an input csv ms_files = ["file,chrlib,group"] + + # This assertion makes sure we are defining the + # csv correctly and does not get prematurely terminated + # since zip does not check that all arguments are the same + # length. + assert len(raw_files) == len(chrlibs) + assert len(raw_files) == len(groups) + for row in zip(raw_files, chrlibs, groups): row = list(row) row[0] = str(row[0]) @@ -34,6 +47,9 @@ def base_project(tmp_path): ms_files_csv = tmp_path / "ms_files.csv" with ms_files_csv.open("w+") as fhndl: fhndl.write("\n".join(ms_files) + "\n") + + # print the file + print(ms_files_csv.read_text()) ms_files_csv_short = tmp_path / "ms_files_short.csv" with ms_files_csv_short.open("w+") as fhndl: diff --git a/tests/system_tests/test_stubs.py b/tests/system_tests/test_stubs.py index ff318ab..f08e9f1 100644 --- a/tests/system_tests/test_stubs.py +++ b/tests/system_tests/test_stubs.py @@ -9,6 +9,7 @@ def test_no_groups(base_project, tmp_path): config, input_csv, _ = base_project pd.read_csv(input_csv).drop(columns="group").to_csv(input_csv, index=False) cmd = ["nextflow", "run", "main.nf"] + config + print(cmd) subprocess.run(cmd, check=True) base = tmp_path / "results" @@ -19,6 +20,7 @@ def test_no_aggregate(base_project, tmp_path): """Test the workflow logic for per experiment workflows""" config, *_ = base_project cmd = ["nextflow", "run", "main.nf"] + config + print(cmd) subprocess.run(cmd, check=True) base = tmp_path / "results" expected = [ @@ -45,16 +47,22 @@ def test_aggregate(base_project, tmp_path): """Test workflow logic for global analyses.""" config, *_ = base_project + template_file = tmp_path / "template.sky.zip" + template_file.touch() + cmd = [ "nextflow", "run", "main.nf", "--aggregate", "true", + "--skyline_template", + str(template_file), ] cmd += config + print(cmd) subprocess.run(cmd, check=True) base = tmp_path / "results" not_expected = [ @@ -80,6 +88,7 @@ def test_already_converted(base_project, tmp_path): config, *_ = base_project cmd = ["nextflow", "run", "main.nf"] + config + print(cmd) subprocess.run(cmd, check=True) assert old == mzml.stat() @@ -96,6 +105,7 @@ def test_force_convert(base_project, tmp_path): config, *_ = base_project cmd = ["nextflow", "run", "main.nf", "--msconvert.force", "true"] + config + print(cmd) subprocess.run(cmd, check=True) assert old != mzml.stat() diff --git a/tests/unit_tests/test_msstats.py b/tests/unit_tests/test_msstats.py index 0a34bbd..2b18186 100644 --- a/tests/unit_tests/test_msstats.py +++ b/tests/unit_tests/test_msstats.py @@ -15,6 +15,9 @@ Path("QCPlot.pdf"), ] +""" +Note that the script fixture is defined at the bottom of this file. +""" def test_joins(msstats_input, script): """Test that the joins are made correctly""" @@ -217,7 +220,8 @@ def script(monkeypatch, tmp_path): """Set the working directory""" (tmp_path / "msstats").mkdir(exist_ok=True) (tmp_path / "results").mkdir(exist_ok=True) - script_path = Path("bin/msstats.R").resolve() + script_location = Path(__file__).parent / "../../bin" + script_path = (script_location / "msstats.R").resolve() monkeypatch.syspath_prepend(script_path) monkeypatch.chdir(tmp_path) return script_path