From 06eb8eacf0977961f5bc145a597538f097896590 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Mon, 26 Jun 2023 09:59:37 -0700 Subject: [PATCH 01/41] initial commit on the branch --- .github/workflows/release.yml | 2 +- main.nf | 2 +- modules/convert.nf | 61 +++++++++++++++++++++++ modules/msconvert.nf | 29 ----------- subworkflows/{msconvert.nf => convert.nf} | 15 +++++- tests/conftest.py | 15 +++++- 6 files changed, 90 insertions(+), 34 deletions(-) create mode 100644 modules/convert.nf delete mode 100644 modules/msconvert.nf rename subworkflows/{msconvert.nf => convert.nf} (68%) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4a7121d..d24dc05 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Create latest tag uses: Actions-R-Us/actions-tagger@latest diff --git a/main.nf b/main.nf index 3b2be0a..4a87261 100644 --- a/main.nf +++ b/main.nf @@ -3,7 +3,7 @@ nextflow.enable.dsl = 2 // Subworkflows -include { CONVERT_TO_MZML } from "./subworkflows/msconvert" +include { CONVERT_TO_MZML } from "./subworkflows/convert" include { BUILD_CHROMATOGRAM_LIBRARY; PERFORM_QUANT; diff --git a/modules/convert.nf b/modules/convert.nf new file mode 100644 index 0000000..3e41acd --- /dev/null +++ b/modules/convert.nf @@ -0,0 +1,61 @@ +process MSCONVERT { + publishDir "${params.mzml_dir}/${outputDir}", failOnError: true + label 'process_low_constant' + label 'error_retry' + + input: + tuple val(file_id), path(raw_input), val(outputDir) + + output: + tuple val(file_id), path("${raw_input.baseName}.mzML.gz") + + script: + """ + wine msconvert \\ + -v \\ + --gzip \\ + --mzML \\ + --64 \\ + --zlib \\ + --filter "peakPicking true 1-" \\ + ${params.msconvert.demultiplex ? '--filter "demultiplex optimization=overlap_only"' : ''} \\ + ${raw_input} + """ + + stub: + """ + touch ${raw_input.baseName}.mzML.gz + """ +} + + +process TDF2MZML { + publishDir "${params.mzml_dir}/${outputDir}", pattern: "*.mzML.gz", failOnError: true + container 'mfreitas/tdf2mzml:latest' // I don't know which stable tag to use... + label 'process_single' + label 'error_retry' + + input: + tuple val(file_id), path(tdf_input), val(outputDir) + + output: + tuple val(file_id), path("${file(tdf_input.baseName).baseName}.mzML.gz") + + script: + """ + echo "Unpacking..." + tar -xvf ${tdf_input} + + echo "Converting..." + tdf2mzml.py -i *.d # --ms1_type "centroid" + + echo "Compressing..." + mv *.mzml ${file(tdf_input.baseName).baseName}.mzML + gzip ${file(tdf_input.baseName).baseName}.mzML + """ + + stub: + """ + touch ${file(tdf_input.baseName).baseName}.mzML.gz + """ +} \ No newline at end of file diff --git a/modules/msconvert.nf b/modules/msconvert.nf deleted file mode 100644 index b3430c3..0000000 --- a/modules/msconvert.nf +++ /dev/null @@ -1,29 +0,0 @@ -process MSCONVERT { - publishDir "${params.mzml_dir}/${outputDir}", failOnError: true - label 'process_low_constant' - label 'error_retry' - - input: - tuple val(file_id), path(raw_input), val(outputDir) - - output: - tuple val(file_id), path("${raw_input.baseName}.mzML.gz") - - script: - """ - wine msconvert \\ - -v \\ - --gzip \\ - --mzML \\ - --64 \\ - --zlib \\ - --filter "peakPicking true 1-" \\ - ${params.msconvert.demultiplex ? '--filter "demultiplex optimization=overlap_only"' : ''} \\ - ${raw_input} - """ - - stub: - """ - touch ${raw_input.baseName}.mzML.gz - """ -} diff --git a/subworkflows/msconvert.nf b/subworkflows/convert.nf similarity index 68% rename from subworkflows/msconvert.nf rename to subworkflows/convert.nf index 1d889d7..8cd1e30 100644 --- a/subworkflows/msconvert.nf +++ b/subworkflows/convert.nf @@ -1,6 +1,7 @@ -include { MSCONVERT } from "../modules/msconvert.nf" +include { MSCONVERT; TDF2MZML } from "../modules/convert" + workflow CONVERT_TO_MZML { take: raw_files @@ -21,7 +22,17 @@ workflow CONVERT_TO_MZML { } | set { staging } - MSCONVERT(staging.mzml_absent) + staging.mzml_absent + | branch { + is_tdf: it[0].toLowerCase().endsWith(".d.tar") + return it + is_raw: true + return it + } + |set { to_convert } + + MSCONVERT(to_convert.is_raw) + | concat(TDF2MZML(to_convert.is_tdf)) | concat(staging.is_mzml) | concat(staging.mzml_present) | set { results } diff --git a/tests/conftest.py b/tests/conftest.py index 06ff8c6..03cd4eb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,18 +14,31 @@ def base_project(tmp_path): raw_dir = tmp_path / "subdir" raw_dir.mkdir() raw_files = [raw_dir / f"{f}.raw" for f in "abcdefghijklm"] + dot_d_files = [raw_dir / f"{f}.d.tar" for f in "opqrs"] for raw_file in raw_files: raw_file.touch() + for raw_file in dot_d_files: + raw_file.touch() mzml_file = raw_dir / "n.mzML.gz" mzml_file.touch() raw_files.append(mzml_file) + raw_files.extend(dot_d_files) chrlibs = ["true"] * 6 + ["false"] * 8 - groups = "xyz" * 4 + "z" * 2 + chrlibs.extend(["false"] * len(dot_d_files)) + groups = "xyz" * 4 + "z" * 2 + "" * 8 + "w" * len(dot_d_files) # create an input csv ms_files = ["file,chrlib,group"] + + # This assertion makes sure we are defining the + # csv correctly and does not get prematurely terminated + # since zip does not check that all arguments are the same + # length. + assert len(raw_files) == len(chrlibs) + assert len(raw_files) == len(groups) + for row in zip(raw_files, chrlibs, groups): row = list(row) row[0] = str(row[0]) From c4c15918d6242217d9498ec768d3162e6c17098c Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Tue, 27 Jun 2023 11:19:42 -0700 Subject: [PATCH 02/41] added params to the encyclopedia run --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 0acaf12..c952eda 100644 --- a/nextflow.config +++ b/nextflow.config @@ -88,7 +88,7 @@ params { * The default attempts to match the defaults from the graphical * user interface. */ - encyclopedia.args = '-percolatorVersion v3-01 -quantifyAcrossSamples true -scoringBreadthType window' + encyclopedia.args = '-percolatorVersion v3-01 -quantifyAcrossSamples true -scoringBreadthType window -minLength 7 -maxLength 40 -minIntensity 50 -filterPeaklists true ' /** Additional command line arguments to use when searching files. */ encyclopedia.local.args = '' From 6a19f136521ac84096ae1dddae85b9fe7a55fd49 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Fri, 21 Jul 2023 11:54:55 -0700 Subject: [PATCH 03/41] (wip) addition of skyline --- main.nf | 49 +++++++++++++- modules/encyclopedia.nf | 6 +- modules/ims.nf | 23 +++++++ modules/skyline.nf | 113 +++++++++++++++++++++++++++++++ subworkflows/encyclopedia.nf | 16 ++++- tests/conftest.py | 3 + tests/system_tests/test_stubs.py | 10 +++ 7 files changed, 216 insertions(+), 4 deletions(-) create mode 100644 modules/ims.nf create mode 100644 modules/skyline.nf diff --git a/main.nf b/main.nf index 4a87261..60b52b7 100644 --- a/main.nf +++ b/main.nf @@ -12,6 +12,12 @@ include { // Modules include { MSSTATS } from "./modules/msstats" +include { ADD_IMS_INFO } from "./modules/ims" +include { + SKYLINE_ADD_LIB; + SKYLINE_IMPORT_DATA; + SKYLINE_MERGE_RESULTS +} from "./modules/skyline" // @@ -74,6 +80,16 @@ workflow { error "No MS data files were provided. Nothing to do." } + // Raw Mass Spec files (raw including .raw or .d/.tar) + // These files will be used later to quant using skyline. + // This also filter out files that are chromatogram libraries + ms_files.runs + | join(ms_files.meta) + | filter { !it[1] } + | map { it[0] } + | filter( ~/^.*((.raw)|(.d.tar))$/ ) + | set { raw_quant_files } + // Convert raw files to gzipped mzML and group them by experiment. // The chrlib and quant channels take the following form: // [[file_ids], [mzml_gz_files], is_chrlib, group] @@ -111,9 +127,38 @@ workflow { if ( params.aggregate ) { // Aggregate quantitative runs with EncyclopeDIA. // The output has one channel: - // global -> [agg_name, peptides, proteins] or null + // global -> [agg_name, peptides_txt, proteins_txt] or null + // lib -> blib PERFORM_AGGREGATE_QUANT(quant_results.local, dlib, fasta) | set { enc_results } + + ADD_IMS_INFO(enc_results.blib) + | set { blib } + + skyline_template = file(params.skyline_template, checkIfExists: true) + SKYLINE_ADD_LIB(skyline_template, blib, fasta) + | set { skyline_template_zipfile } + + SKYLINE_IMPORT_DATA( + skyline_template_zipfile.skyline_zipfile, + raw_quant_files, + ) + | set { skyline_import_results } + + raw_quant_files = raw_quant_files.collect() + + skyline_import_results.skyd_file.view() + skyd_files = skyline_import_results.skyd_file.collect() + + skyd_files.view() + + SKYLINE_MERGE_RESULTS( + skyline_template_zipfile.skyline_zipfile, + skyd_files, + raw_quant_files, + ) + + } else { quant_results | set{ enc_results } } @@ -122,6 +167,8 @@ workflow { if ( params.msstats.enabled ) { MSSTATS(enc_results.global, input, contrasts) } + + // } diff --git a/modules/encyclopedia.nf b/modules/encyclopedia.nf index 01bd427..e25e012 100644 --- a/modules/encyclopedia.nf +++ b/modules/encyclopedia.nf @@ -23,7 +23,7 @@ def stem(suffix) { process ENCYCLOPEDIA_SEARCH { publishDir "${params.result_dir}/${group}/elib", pattern: '*.elib', failOnError: true publishDir "${params.result_dir}/${group}/logs", pattern: '*.log', failOnError: true - label 'process_medium' + label 'process_high' input: tuple val(group), path(mzml_gz_file) @@ -70,6 +70,7 @@ process ENCYCLOPEDIA_SEARCH { process ENCYCLOPEDIA_AGGREGATE { publishDir "${params.result_dir}/${group}/elib", pattern: '*.elib', failOnError: true + publishDir "${params.result_dir}/${group}/blib", pattern: '*.blib', failOnError: true publishDir "${params.result_dir}/${group}/logs", pattern: '*.log', failOnError: true publishDir "${params.result_dir}/${group}/results", pattern: '*.txt', failOnError: true publishDir "${params.result_dir}/${group}/reports", pattern: '*.csv', failOnError: true @@ -92,6 +93,7 @@ process ENCYCLOPEDIA_AGGREGATE { tuple( val(group), path("${stem(output_suffix)}.elib"), + path("${stem(output_suffix)}.blib"), path("${stem(output_suffix)}.global.log"), path("${output_suffix}_detection_summary.csv"), emit: "lib" @@ -117,6 +119,7 @@ process ENCYCLOPEDIA_AGGREGATE { -i ./ \\ -f ${fasta_file} \\ -l ${library_file} \\ + -blib true \\ ${params.encyclopedia.args} \\ ${params.encyclopedia.global.args} \\ -a ${align} \\ @@ -140,6 +143,7 @@ process ENCYCLOPEDIA_AGGREGATE { stub: """ touch ${stem(output_suffix)}.elib + touch ${stem(output_suffix)}.blib if [ "${align}" = true ]; then touch ${stem(output_suffix)}.peptides.txt diff --git a/modules/ims.nf b/modules/ims.nf new file mode 100644 index 0000000..d315df2 --- /dev/null +++ b/modules/ims.nf @@ -0,0 +1,23 @@ + +process ADD_IMS_INFO { + publishDir "${params.result_dir}/${group}/blib", pattern: '*.ims.blib', failOnError: true + label 'process_medium' + container 'ghcr.io/talusbio/flimsay:v0.2.0' + + input: + path blib + + output: + path("*.ims.blib"), emit: blib + + script: + """ + flimsay fill_blib ${blib} blib.ims.blib + """ + + stub: + """ + echo "${blib}" + touch blib.ims.blib + """ +} diff --git a/modules/skyline.nf b/modules/skyline.nf new file mode 100644 index 0000000..5eba01a --- /dev/null +++ b/modules/skyline.nf @@ -0,0 +1,113 @@ +process SKYLINE_ADD_LIB { + publishDir "${params.result_dir}/skyline/add-lib", failOnError: true, mode: 'copy' + label 'process_medium' + label 'error_retry' + container 'quay.io/protio/pwiz-skyline-i-agree-to-the-vendor-licenses:3.0.23187-2243781' + + input: + path skyline_template_zipfile + path fasta + path blib + + output: + path("results.sky.zip"), emit: skyline_zipfile + path("skyline_add_library.log"), emit: log + + script: + """ + unzip ${skyline_template_zipfile} + + wine SkylineCmd \ + --in="${skyline_template_zipfile.baseName}" \ + --log-file=skyline_add_library.log \ + --import-fasta="${fasta}" \ + --add-library-path="${blib}" \ + --out="results.sky" \ + --save \ + --share-zip="results.sky.zip" \ + --share-type="complete" + """ + + stub: + """ + echo "${skyline_template_zipfile}" + echo "${fasta}" + echo "${blib}" + touch skyline_add_library.log + touch results.sky.zip + """ +} + +process SKYLINE_IMPORT_DATA { + publishDir "${params.result_dir}/skyline/import-spectra", failOnError: true, mode: 'copy' + label 'process_medium' + label 'process_high_memory' + label 'error_retry' + container 'quay.io/protio/pwiz-skyline-i-agree-to-the-vendor-licenses:3.0.23187-2243781' + + input: + path skyline_zipfile + path raw_file + + output: + path("*.skyd"), emit: skyd_file + path("${raw_file.baseName}.log"), emit: log_file + + script: + """ + unzip ${skyline_zipfile} + + wine SkylineCmd \ + --in="${skyline_zipfile.baseName}" \ + --import-no-join \ + --log-file="${raw_file.baseName}.log" \ + --import-file="${raw_file}" \ + """ + + stub: + """ + echo "${skyline_zipfile}" + echo "${raw_file}" + touch ${raw_file.baseName}.skyd + touch ${raw_file.baseName}.log + """ +} + +process SKYLINE_MERGE_RESULTS { + publishDir "${params.result_dir}/skyline/import-spectra", failOnError: true, mode: 'copy' + label 'process_high' + label 'error_retry' + container 'quay.io/protio/pwiz-skyline-i-agree-to-the-vendor-licenses:3.0.23187-2243781' + + input: + path skyline_zipfile + path '*.skyd' + val raw_files + + output: + path("final.sky.zip"), emit: final_skyline_zipfile + path("skyline-merge.log"), emit: log + + script: + // import_files_params = "--import-file=${(mzml_files as List).collect{ "/tmp/" + file(it).name }.join(' --import-file=')}" + """ + unzip ${skyline_zipfile} + + wine SkylineCmd \ + --in="${skyline_zipfile.baseName}" \ + --log-file="skyline-merge.log" \ + ${import_files_params} \ + --out="final.sky" \ + --save \ + --share-zip="final.sky.zip" \ + --share-type="complete" + """ + + stub: + """ + echo "${skyline_zipfile}" + echo "${*.skyd}" + touch skyline-merge.log + touch final.sky.zip + """ +} diff --git a/subworkflows/encyclopedia.nf b/subworkflows/encyclopedia.nf index fda0e44..8ac919f 100644 --- a/subworkflows/encyclopedia.nf +++ b/subworkflows/encyclopedia.nf @@ -97,7 +97,10 @@ workflow PERFORM_QUANT { fasta, params.encyclopedia.quant_suffix, true // Align RTs - ).quant + ) + | set { agg_outs } + + agg_outs.quant | set { global_files } } @@ -136,9 +139,18 @@ workflow PERFORM_AGGREGATE_QUANT { fasta, params.encyclopedia.quant_suffix, true // Align RTs - ).quant + ) + | set { agg_results } + + agg_results.quant | set { global_files } + // Lib is ['aggregated', .elib, .blib, .log, summary.csv] + agg_results.lib + | map {it[1]} + | set { blib } + emit: global = global_files + blib = blib } diff --git a/tests/conftest.py b/tests/conftest.py index 03cd4eb..8718087 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -47,6 +47,9 @@ def base_project(tmp_path): ms_files_csv = tmp_path / "ms_files.csv" with ms_files_csv.open("w+") as fhndl: fhndl.write("\n".join(ms_files) + "\n") + + # print the file + print(ms_files_csv.read_text()) ms_files_csv_short = tmp_path / "ms_files_short.csv" with ms_files_csv_short.open("w+") as fhndl: diff --git a/tests/system_tests/test_stubs.py b/tests/system_tests/test_stubs.py index ff318ab..f08e9f1 100644 --- a/tests/system_tests/test_stubs.py +++ b/tests/system_tests/test_stubs.py @@ -9,6 +9,7 @@ def test_no_groups(base_project, tmp_path): config, input_csv, _ = base_project pd.read_csv(input_csv).drop(columns="group").to_csv(input_csv, index=False) cmd = ["nextflow", "run", "main.nf"] + config + print(cmd) subprocess.run(cmd, check=True) base = tmp_path / "results" @@ -19,6 +20,7 @@ def test_no_aggregate(base_project, tmp_path): """Test the workflow logic for per experiment workflows""" config, *_ = base_project cmd = ["nextflow", "run", "main.nf"] + config + print(cmd) subprocess.run(cmd, check=True) base = tmp_path / "results" expected = [ @@ -45,16 +47,22 @@ def test_aggregate(base_project, tmp_path): """Test workflow logic for global analyses.""" config, *_ = base_project + template_file = tmp_path / "template.sky.zip" + template_file.touch() + cmd = [ "nextflow", "run", "main.nf", "--aggregate", "true", + "--skyline_template", + str(template_file), ] cmd += config + print(cmd) subprocess.run(cmd, check=True) base = tmp_path / "results" not_expected = [ @@ -80,6 +88,7 @@ def test_already_converted(base_project, tmp_path): config, *_ = base_project cmd = ["nextflow", "run", "main.nf"] + config + print(cmd) subprocess.run(cmd, check=True) assert old == mzml.stat() @@ -96,6 +105,7 @@ def test_force_convert(base_project, tmp_path): config, *_ = base_project cmd = ["nextflow", "run", "main.nf", "--msconvert.force", "true"] + config + print(cmd) subprocess.run(cmd, check=True) assert old != mzml.stat() From 96a8b3e06b2b35b64fcb725adbb13be025a1cf84 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Fri, 21 Jul 2023 13:51:44 -0700 Subject: [PATCH 04/41] added branching to the skyline quant step --- main.nf | 12 ++++++++---- modules/ims.nf | 3 ++- modules/skyline.nf | 6 ++---- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/main.nf b/main.nf index 60b52b7..6607e0e 100644 --- a/main.nf +++ b/main.nf @@ -131,14 +131,20 @@ workflow { // lib -> blib PERFORM_AGGREGATE_QUANT(quant_results.local, dlib, fasta) | set { enc_results } - - ADD_IMS_INFO(enc_results.blib) + + quant_results.local + | map { it[0] } + | set { groups } + + ADD_IMS_INFO(groups, enc_results.blib) | set { blib } skyline_template = file(params.skyline_template, checkIfExists: true) SKYLINE_ADD_LIB(skyline_template, blib, fasta) | set { skyline_template_zipfile } + println raw_quant_files + raw_quant_files.view() SKYLINE_IMPORT_DATA( skyline_template_zipfile.skyline_zipfile, raw_quant_files, @@ -147,9 +153,7 @@ workflow { raw_quant_files = raw_quant_files.collect() - skyline_import_results.skyd_file.view() skyd_files = skyline_import_results.skyd_file.collect() - skyd_files.view() SKYLINE_MERGE_RESULTS( diff --git a/modules/ims.nf b/modules/ims.nf index d315df2..a537c9a 100644 --- a/modules/ims.nf +++ b/modules/ims.nf @@ -5,6 +5,7 @@ process ADD_IMS_INFO { container 'ghcr.io/talusbio/flimsay:v0.2.0' input: + val group path blib output: @@ -12,7 +13,7 @@ process ADD_IMS_INFO { script: """ - flimsay fill_blib ${blib} blib.ims.blib + flimsay fill_blib ${blib} blib.ims.blib """ stub: diff --git a/modules/skyline.nf b/modules/skyline.nf index 5eba01a..ff51441 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -47,7 +47,7 @@ process SKYLINE_IMPORT_DATA { input: path skyline_zipfile - path raw_file + each path(raw_file) output: path("*.skyd"), emit: skyd_file @@ -89,7 +89,7 @@ process SKYLINE_MERGE_RESULTS { path("skyline-merge.log"), emit: log script: - // import_files_params = "--import-file=${(mzml_files as List).collect{ "/tmp/" + file(it).name }.join(' --import-file=')}" + import_files_params = "--import-file=${(mzml_files as List).collect{ "/tmp/" + file(it).name }.join(' --import-file=')}" """ unzip ${skyline_zipfile} @@ -105,8 +105,6 @@ process SKYLINE_MERGE_RESULTS { stub: """ - echo "${skyline_zipfile}" - echo "${*.skyd}" touch skyline-merge.log touch final.sky.zip """ From 0a76604f863dec6502345ce375326ed0e178d46c Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Fri, 21 Jul 2023 14:34:49 -0700 Subject: [PATCH 05/41] (wip) improvement on skyline job passing --- main.nf | 7 +++---- modules/skyline.nf | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/main.nf b/main.nf index 6607e0e..b901c9d 100644 --- a/main.nf +++ b/main.nf @@ -143,8 +143,7 @@ workflow { SKYLINE_ADD_LIB(skyline_template, blib, fasta) | set { skyline_template_zipfile } - println raw_quant_files - raw_quant_files.view() + // This will generate a skyd for every raw data file SKYLINE_IMPORT_DATA( skyline_template_zipfile.skyline_zipfile, raw_quant_files, @@ -152,16 +151,16 @@ workflow { | set { skyline_import_results } raw_quant_files = raw_quant_files.collect() - skyd_files = skyline_import_results.skyd_file.collect() - skyd_files.view() SKYLINE_MERGE_RESULTS( skyline_template_zipfile.skyline_zipfile, skyd_files, raw_quant_files, ) + | set { skyline_merge_results } + skyline_merge_results.final_skyline_zipfile.view() } else { quant_results | set{ enc_results } diff --git a/modules/skyline.nf b/modules/skyline.nf index ff51441..9747e2b 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -1,5 +1,5 @@ process SKYLINE_ADD_LIB { - publishDir "${params.result_dir}/skyline/add-lib", failOnError: true, mode: 'copy' + publishDir "${params.result_dir}/skyline/", failOnError: true, mode: 'copy' label 'process_medium' label 'error_retry' container 'quay.io/protio/pwiz-skyline-i-agree-to-the-vendor-licenses:3.0.23187-2243781' @@ -39,7 +39,7 @@ process SKYLINE_ADD_LIB { } process SKYLINE_IMPORT_DATA { - publishDir "${params.result_dir}/skyline/import-spectra", failOnError: true, mode: 'copy' + publishDir "${params.result_dir}/skyline/", failOnError: true, mode: 'copy' label 'process_medium' label 'process_high_memory' label 'error_retry' @@ -74,7 +74,7 @@ process SKYLINE_IMPORT_DATA { } process SKYLINE_MERGE_RESULTS { - publishDir "${params.result_dir}/skyline/import-spectra", failOnError: true, mode: 'copy' + publishDir "${params.result_dir}/skyline/", failOnError: true, mode: 'copy' label 'process_high' label 'error_retry' container 'quay.io/protio/pwiz-skyline-i-agree-to-the-vendor-licenses:3.0.23187-2243781' From 84dc60a333099cd0149a2b2e4ba28f730119428c Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Mon, 24 Jul 2023 11:34:06 -0700 Subject: [PATCH 06/41] made blib conversion 2 step --- modules/encyclopedia.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/encyclopedia.nf b/modules/encyclopedia.nf index e25e012..b2e3dc8 100644 --- a/modules/encyclopedia.nf +++ b/modules/encyclopedia.nf @@ -119,12 +119,14 @@ process ENCYCLOPEDIA_AGGREGATE { -i ./ \\ -f ${fasta_file} \\ -l ${library_file} \\ - -blib true \\ ${params.encyclopedia.args} \\ ${params.encyclopedia.global.args} \\ -a ${align} \\ | tee ${stem(output_suffix)}.global.log + ${execEncyclopedia(task.memory)} \\ + -convert -libtoblib -i ${stem(output_suffix)}.elib + # Better file names: if [ "${align}" = true ]; then mv ${stem(output_suffix)}.elib.peptides.txt ${stem(output_suffix)}.peptides.txt From fa5bf4b7ae6f997f62a24c313abe37d1a01aca65 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Mon, 24 Jul 2023 11:47:39 -0700 Subject: [PATCH 07/41] Update test.yml --- .github/workflows/test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 307ea52..bd9b511 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -39,9 +39,9 @@ jobs: - name: Run Unit Tests run: | - REMOTE=/app/nf-encyclopedia - docker run -v $(pwd):${REMOTE} -w ${REMOTE} nf-encyclopedia:latest \ - pytest tests/unit_tests + # REMOTE=/app/nf-encyclopedia + docker run -v $(pwd):${REMOTE} nf-encyclopedia:latest \ + pytest tests/unit_tests # -w ${REMOTE} - name: Run System Tests run: | From d9706cee5f70cbb2e6bb6392d8ad1045c8c88ab3 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Mon, 24 Jul 2023 11:54:35 -0700 Subject: [PATCH 08/41] minor addition to tests cicd --- .github/workflows/test.yml | 2 +- subworkflows/encyclopedia.nf | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index bd9b511..3bd8cde 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -39,7 +39,7 @@ jobs: - name: Run Unit Tests run: | - # REMOTE=/app/nf-encyclopedia + REMOTE=/app/nf-encyclopedia docker run -v $(pwd):${REMOTE} nf-encyclopedia:latest \ pytest tests/unit_tests # -w ${REMOTE} diff --git a/subworkflows/encyclopedia.nf b/subworkflows/encyclopedia.nf index 8ac919f..d3db851 100644 --- a/subworkflows/encyclopedia.nf +++ b/subworkflows/encyclopedia.nf @@ -41,7 +41,7 @@ workflow BUILD_CHROMATOGRAM_LIBRARY { params.encyclopedia.chrlib_suffix, false // Don't align RTs ).lib - | map { it -> tuple it[0], it[1] } + | map { eait -> tuple eait[0], eait[1] } | set { output_elib } emit: @@ -61,9 +61,9 @@ workflow PERFORM_QUANT { // output is [group, mzml_gz_file, elib] quant_files | transpose() - | multiMap { it -> - mzml: tuple it[0], it[1] - elib: it[2] + | multiMap { qit -> + mzml: tuple qit[0], qit[1] + elib: qit[2] } | set { ungrouped_files } @@ -86,12 +86,12 @@ workflow PERFORM_QUANT { // Only run group-wise global if needed. if ( local_only ) { - Channel.empty() | set { global_files } - Channel.empty() | set { msstats_files } + Channel.empty() | set { global_files } + Channel.empty() | set { msstats_files } } else { - // Do the global analysis - // Output is [group, peptides_txt, proteins_txt] - ENCYCLOPEDIA_AGGREGATE( + // Do the global analysis + // Output is [group, peptides_txt, proteins_txt] + ENCYCLOPEDIA_AGGREGATE( local_files, dlib, fasta, @@ -100,7 +100,7 @@ workflow PERFORM_QUANT { ) | set { agg_outs } - agg_outs.quant + agg_outs.quant | set { global_files } } @@ -139,15 +139,15 @@ workflow PERFORM_AGGREGATE_QUANT { fasta, params.encyclopedia.quant_suffix, true // Align RTs - ) + ) | set { agg_results } - + agg_results.quant | set { global_files } // Lib is ['aggregated', .elib, .blib, .log, summary.csv] agg_results.lib - | map {it[1]} + | map { libe -> libe[1] } | set { blib } emit: From df8e1ebc82988317d11745cc09af775617ea6483 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Mon, 24 Jul 2023 15:47:00 -0700 Subject: [PATCH 09/41] updated docker image --- Dockerfile | 69 ++++++++++++++++++++++++++++--------------------- environment.yml | 12 ++++----- 2 files changed, 46 insertions(+), 35 deletions(-) diff --git a/Dockerfile b/Dockerfile index 61ff417..ad7d099 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,66 +1,77 @@ FROM --platform=linux/amd64 mambaorg/micromamba:latest as micromamba -# First Stage of the build, gets the jar for encyclopedia -FROM --platform=linux/amd64 openjdk:8-jre +# FROM --platform=linux/amd64 ibmjava:11 +# FROM --platform=linux/amd64 amazoncorretto:11-al2023-headless # Uses yum for package management. +FROM --platform=linux/amd64 nextflow/nextflow:23.04.2 + ARG VERSION=2.12.30 ENV VERSION ${VERSION} +LABEL authors="wfondrie@talus.bio" \ + description="Docker image for most of nf-encyclopedia" + + +# Install procps so that Nextflow can poll CPU usage and +# deep clean the apt cache to reduce image/layer size +# RUN apt-get install -y procps sqlite3 libgomp1 \ +# && apt-get clean -y && rm -rf /var/lib/apt/lists/* +RUN yum install -y wget -RUN apt-get update && \ - apt-get -y upgrade && \ - apt-get -y install libgomp1 && \ - apt-get clean WORKDIR /code +# First Stage of the build, gets the jar for encyclopedia RUN wget https://bitbucket.org/searleb/encyclopedia/downloads/encyclopedia-${VERSION}-executable.jar -WORKDIR /app -LABEL authors="wfondrie@talus.bio" \ - description="Docker image for most of nf-encyclopedia" +# # Install nextflow +# RUN wget -qO- https://get.nextflow.io | bash +# RUN chmod +x nextflow +# RUN mv nextflow /usr/local/bin/. -# Install procps so that Nextflow can poll CPU usage and -# deep clean the apt cache to reduce image/layer size -RUN apt-get update \ - && apt-get install -y procps sqlite3 \ - && apt-get clean -y && rm -rf /var/lib/apt/lists/* +WORKDIR /app # Setup micromamba: -ARG MAMBA_USER=mamba +ARG MAMBA_USER=root ARG MAMBA_USER_ID=1000 ARG MAMBA_USER_GID=1000 +ARG MAMBA_DOCKERFILE_ACTIVATE=1 ENV MAMBA_USER=$MAMBA_USER ENV MAMBA_ROOT_PREFIX="/opt/conda" ENV MAMBA_EXE="/bin/micromamba" +RUN mkdir -p ${MAMBA_ROOT_PREFIX} COPY --from=micromamba "$MAMBA_EXE" "$MAMBA_EXE" COPY --from=micromamba /usr/local/bin/_activate_current_env.sh /usr/local/bin/_activate_current_env.sh COPY --from=micromamba /usr/local/bin/_dockerfile_shell.sh /usr/local/bin/_dockerfile_shell.sh -COPY --from=micromamba /usr/local/bin/_entrypoint.sh /usr/local/bin/_entrypoint.sh +COPY --from=micromamba /usr/local/bin/_entrypoint.sh /usr/local/bin/mamba_entrypoint.sh COPY --from=micromamba /usr/local/bin/_activate_current_env.sh /usr/local/bin/_activate_current_env.sh COPY --from=micromamba /usr/local/bin/_dockerfile_initialize_user_accounts.sh /usr/local/bin/_dockerfile_initialize_user_accounts.sh COPY --from=micromamba /usr/local/bin/_dockerfile_setup_root_prefix.sh /usr/local/bin/_dockerfile_setup_root_prefix.sh -RUN /usr/local/bin/_dockerfile_initialize_user_accounts.sh && \ - /usr/local/bin/_dockerfile_setup_root_prefix.sh +# No need to set up accounts if we will run as root ... +# RUN /usr/local/bin/_dockerfile_initialize_user_accounts.sh && + +RUN /usr/local/bin/_dockerfile_setup_root_prefix.sh # Setup the environment -USER root +USER $MAMBA_USER COPY environment.yml /tmp/environment.yml # Instruct R processes to use these empty files instead of # clashing with a local one RUN touch .Rprofile .Renviron -# Create the environment -RUN micromamba install -y -n base -f /tmp/environment.yml && \ - micromamba clean --all --yes +# Set the path. NextFlow seems to circumvent the conda environment +# We also need to set options for the JRE here. +ENV PATH="$MAMBA_ROOT_PREFIX/bin:$PATH:/bin" _JAVA_OPTIONS="-Djava.awt.headless=true" VERSION=$VERSION # Setup the EncyclopeDIA executable: RUN ln -s /code/encyclopedia-$VERSION-executable.jar /code/encyclopedia.jar -# Set the path. NextFlow seems to circumvent the conda environment -# We also need to set options for the JRE here. -ENV PATH="$MAMBA_ROOT_PREFIX/bin:$PATH" _JAVA_OPTIONS="-Djava.awt.headless=true" VERSION=$VERSION - # Create the entrypoint: -SHELL ["/usr/local/bin/_dockerfile_shell.sh"] -ENTRYPOINT ["/usr/local/bin/_entrypoint.sh"] -CMD [] +# SHELL ["/usr/local/bin/_dockerfile_shell.sh"] +# ENTRYPOINT ["/usr/local/bin/mamba_entrypoint.sh", "/usr/local/bin/entry.sh"] + +# Create the environment +RUN micromamba install -y -n base -f /tmp/environment.yml && \ + micromamba clean --all --yes + +CMD ["/bin/bash"] + diff --git a/environment.yml b/environment.yml index e8c1b41..c787b45 100644 --- a/environment.yml +++ b/environment.yml @@ -8,9 +8,9 @@ dependencies: - pytest - numpy - pandas - - bioconductor-msstats=4.2.0 - - r-rlang - - r-dplyr - - r-tidyr - - r-magrittr - - r-stringr + - bioconda::bioconductor-msstats + - conda-forge::r-rlang + - conda-forge::r-dplyr + - conda-forge::r-tidyr + - r::r-magrittr + - r::r-stringr From 2df5d7a0cc2b82ea114e10a4f96654321fd6003b Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Mon, 24 Jul 2023 15:49:59 -0700 Subject: [PATCH 10/41] moved skyline logic out of agg --- main.nf | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/main.nf b/main.nf index b901c9d..8de8096 100644 --- a/main.nf +++ b/main.nf @@ -123,15 +123,6 @@ workflow { PERFORM_QUANT(quant_files, dlib, fasta, params.aggregate) | set { quant_results } - // Perform an aggregate analysis on all files if needed: - if ( params.aggregate ) { - // Aggregate quantitative runs with EncyclopeDIA. - // The output has one channel: - // global -> [agg_name, peptides_txt, proteins_txt] or null - // lib -> blib - PERFORM_AGGREGATE_QUANT(quant_results.local, dlib, fasta) - | set { enc_results } - quant_results.local | map { it[0] } | set { groups } @@ -162,6 +153,15 @@ workflow { skyline_merge_results.final_skyline_zipfile.view() + // Perform an aggregate analysis on all files if needed: + if ( params.aggregate ) { + // Aggregate quantitative runs with EncyclopeDIA. + // The output has one channel: + // global -> [agg_name, peptides_txt, proteins_txt] or null + // lib -> blib + PERFORM_AGGREGATE_QUANT(quant_results.local, dlib, fasta) + | set { enc_results } + } else { quant_results | set{ enc_results } } From f0559fa08157a8981a7798756e667ba2d9d04dd5 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Mon, 24 Jul 2023 16:18:55 -0700 Subject: [PATCH 11/41] added skyline template asset --- .gitignore | 1 + main.nf | 47 ++++++++++++++++++------------------ subworkflows/encyclopedia.nf | 6 ++++- 3 files changed, 30 insertions(+), 24 deletions(-) diff --git a/.gitignore b/.gitignore index fc0d425..e8f2d3f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .nextflow +venv .Rhistory .idea/** .gradle diff --git a/main.nf b/main.nf index 8de8096..93947e6 100644 --- a/main.nf +++ b/main.nf @@ -55,6 +55,7 @@ workflow { input = file(params.input, checkIfExists: true) fasta = file(params.fasta, checkIfExists: true) dlib = file(params.dlib, checkIfExists: true) + skyline_empty_template = file("$baseDir/assets/template.sky.zip", checkIfExists: true) // Optional contrasts arg: if ( params.contrasts != null ) { @@ -123,35 +124,35 @@ workflow { PERFORM_QUANT(quant_files, dlib, fasta, params.aggregate) | set { quant_results } - quant_results.local - | map { it[0] } - | set { groups } + quant_results.local + | map { it[0] } + | set { groups } - ADD_IMS_INFO(groups, enc_results.blib) - | set { blib } + // Add IMS info to the blib + ADD_IMS_INFO(groups, quant_results.blib) + | set { blib } - skyline_template = file(params.skyline_template, checkIfExists: true) - SKYLINE_ADD_LIB(skyline_template, blib, fasta) - | set { skyline_template_zipfile } + SKYLINE_ADD_LIB(skyline_empty_template, blib, fasta) + | set { skyline_template_zipfile } - // This will generate a skyd for every raw data file - SKYLINE_IMPORT_DATA( - skyline_template_zipfile.skyline_zipfile, - raw_quant_files, - ) - | set { skyline_import_results } + // This will generate a skyd for every raw data file + SKYLINE_IMPORT_DATA( + skyline_template_zipfile.skyline_zipfile, + raw_quant_files, + ) + | set { skyline_import_results } - raw_quant_files = raw_quant_files.collect() - skyd_files = skyline_import_results.skyd_file.collect() + raw_quant_files = raw_quant_files.collect() + skyd_files = skyline_import_results.skyd_file.collect() - SKYLINE_MERGE_RESULTS( - skyline_template_zipfile.skyline_zipfile, - skyd_files, - raw_quant_files, - ) - | set { skyline_merge_results } + SKYLINE_MERGE_RESULTS( + skyline_template_zipfile.skyline_zipfile, + skyd_files, + raw_quant_files, + ) + | set { skyline_merge_results } - skyline_merge_results.final_skyline_zipfile.view() + skyline_merge_results.final_skyline_zipfile.view() // Perform an aggregate analysis on all files if needed: if ( params.aggregate ) { diff --git a/subworkflows/encyclopedia.nf b/subworkflows/encyclopedia.nf index d3db851..103ef16 100644 --- a/subworkflows/encyclopedia.nf +++ b/subworkflows/encyclopedia.nf @@ -99,7 +99,10 @@ workflow PERFORM_QUANT { true // Align RTs ) | set { agg_outs } - + + agg_outs.lib | + map { libe -> libe[1] } | + set { global_blib } agg_outs.quant | set { global_files } } @@ -107,6 +110,7 @@ workflow PERFORM_QUANT { emit: local = local_files global = global_files + blib = global_blib } From f5b0d7fb8a0b3815f069dd2fdd42c47fd825543e Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Mon, 24 Jul 2023 16:21:41 -0700 Subject: [PATCH 12/41] handled no-merge use case --- subworkflows/encyclopedia.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/subworkflows/encyclopedia.nf b/subworkflows/encyclopedia.nf index 103ef16..9f7f15f 100644 --- a/subworkflows/encyclopedia.nf +++ b/subworkflows/encyclopedia.nf @@ -88,6 +88,7 @@ workflow PERFORM_QUANT { if ( local_only ) { Channel.empty() | set { global_files } Channel.empty() | set { msstats_files } + Channel.empty() | set { global_blib } } else { // Do the global analysis // Output is [group, peptides_txt, proteins_txt] From a1768bae0e9c0c0d2a1a3b1bccf72b8cf4419d77 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Mon, 24 Jul 2023 16:26:46 -0700 Subject: [PATCH 13/41] added asset --- assets/template.sky.zip | Bin 0 -> 9127 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 assets/template.sky.zip diff --git a/assets/template.sky.zip b/assets/template.sky.zip new file mode 100644 index 0000000000000000000000000000000000000000..60e729ace70f78d8c4486d88dd253b5f20a59696 GIT binary patch literal 9127 zcma)?Wo%v9(x&a0nVC6`8DnOMnVB($V~UxXnVH#+WA^beGc!ZX%*-_D{-&kAcQl&W zTdI23($c%6{o~PARVm6qKw^S{fx&`(vT;@$k1+L-g8~El%?k#G2F3;EWM*RzFmf_u zbhLJ7bg?vZO;1PtbND|WcrenxH^9Mg!SFaYr>@?X!2iiXd4`F9t{dBZJ0>CS zN#?w5(NFqa5KS4fiiY7Eq~hEN7MOxHT5XsY?=eeHpg-qN`~Nma;W z2X`D@y|_mBaR%8+XVTzj-ES$k%=QHz0HpOI-IzE z;U}tV58WAv?_^t4K1`RZqR;q8w2xUVFw4>{cg=ztajLh9r0yYYsAcfWzASn8i{4Nf z4|(m|9`%?u5DlL;kU1P+-0YMHxCPb;n%59>ttHM1QxZe*EuV>JEQME0ir929Zbx+s z5j>QrPo)+*B#DfK>mjL>O54S<8L?ocqKI@Qr)Gn=zX`bh1i|UdM;*26BB?RJc<{+J z^I9-1j1x)Q*@x1;LPhWcoH|RnSG>BDQxm=c4RVphpSWW`K_YveLvN5b&I8)xD*KIqdpf;InHAiu^ooZ zu4hs(%3cN%G$C)%k#UU~^|q^8rc8zvr?WMw*q#wRxM-pmh*&@H_RM;c^0h{(`%;yV zP^44Gi7Wb4fR}Xar7cX$2SzaScN1L$W+pY)2;Xw9vo5jGIhc{#qt$VnC>9$Kxlzh- zj%IClwNuCrDDDcU`)zkuQpj*fkqR0E*M^7$xPCA`lbEw` zJg=L0-tM|-gr@gY0SuH0`MLMq8-=PX^ujDVH1k}uqC7(XTE0m*-0n7OgwEGm_fL>V z<`6y{vsf?Hvn+p0M06*RBq2)4TzR zD?9332}|je(__+o2x$(^-Yzb)s zzC7r;ufLg<3mBAA1nWm~w@j&Y1cs+Ab*QQ;^y3|~ty35>Q-MwM-v%&Q1S%aIk(&27 z+VFw=NT24(4%9=kzQg^{ml|@{-i^p=F>`-Pw(Mrwe28S*|G2B60%=e*?a-?#J zT=q^KdMYMcWSkD!Nj`l3%|``PH}kO3Dp<^;YzRt|oqgfX zgr=R{08eEq^`X?#n9wUIH#jIO0-R||Z^c@z>fi6TePBH(r~qWJFy>xLJ-(~X3CdBJ zuzUb4ILz)Rf6#a0+oW$>9>WIc8ysuODtV`qbNAow36^y%;3+{=6?F*9KQSS$GE{s6 z*iU1$bCe$}7AzF4AJ{I|lTSCbF&w=t{%rHDm^p2s@BA_TabTskJ2FOg(LRU<1b34W z^BUVyP(r_S2f7{GCkgL27U1c}mY+kGSK##)j!WcKe0NPu;hZgNrHHl#N^IyCp8Kj7 z=13GGStwd4c*!kn(xi6ortsM8T)VW$FX@+NOBT%t4KgZMge2Z-7Y4@XFvqetS=VE>_JWMMJOU(}qHp`P5aD7klEhaNH`}=PXRt#cr*vuG_FEk(E(|fN zl?h3h`Pt)Hsfhis1j|-|kiz8mLJr_$90G~LV7_lmP6(S|L>UPn$2|@@Upx^qlTR+3 zF^-Z}$D?{VWVc6M7&WDK>^kktx?<2MWLPi0&SM!y&oo2^W%@T`yWqO#w=cQ^oxuU= z?5OqEQ$>2jH$ix-KGxF24kOV93rbr!FS=c{g#wGV zkPQt>ovC?8LM1b-3)$tvS(dQz6rHPzwk5QnuKig#q?D=8u3(||io6CdTQohshN73% z8&w2t%8#W{aNv7YT;klX3tT4|=ZMp*%1b(q>u5^9Y8)k7zvlJ#P2Dm}^} zhbf0a$xe9ry2bBHD823s>hOKtyIfKGlP|_x5tuIV0q%3688L;HsKg`3OE9iUY)ety z`tt)b_vK#@T@3A^)7h%FfA3jIlz#VL45zNm47uv8_U(9H8gLyHbn!S{K`88Bex?s( zf{}uc`?{ox9Ab~#=)m$KT*mE?Q-s94(}3m0NgI~w=!*vFbRz5O8TeMfd#(p+jL1Zb zgPah8tb=c#AF-)?CbXQ4$?RZwz+XHW&aZVT(YAB8v#*(9Xc9sdo06vVQ7xtoRt?8y zbzV1Ie`1{~S0=3;o2M&IoCs%_PWC5^WbTTi0F7mwllmLDH+kkrba<{3F*xHoCyMSM zQK6%Gq~3VU;njY!_I`UYqbr`lhE+5;P1E>!96d#;S~(6YKbT59}gq1HC!g3p<+_Dg^I z4Z;%S8M$eMQ&rbub&ea;>(#3<3pe_cjeRA&EwMLETjoX7!+vS2?y>>h)J5>q?#UK* zqCYh8tw!82WU)?DXL`qXXbYU2$1@%Zn;J|C`(*$nQ`16HyEk z90ndb)@(PUrZZy#HFugvrs$8tevEe2cGCvZz5z1woaGAB8)cj6Mnm}ue3*3FJP`6f zTq!(=t4i7M$0TtMjMy{8RhqX4m6$W^IC1hP1Msw9CcI2+w{lV`_w)LfP z685O+2l-6bxOb7U$62?22^jXy`_!LK-#5P?9%E@-@K$!9Te2KEfM_(8Tx~*m)yQ9L z^t)=5xQB|CTe=u?o@a;9BD;TIV!g~pKjNh=7L#m({X)27XhCM8=(0V>bE)(oxvzR8 zL!ML!7woNHmbxT*9m(B3vOFx`^nW`Hx)ge+9R@!S#ORWwdZinD>xbL&e^L2d?+D|Celcahd#unS7mPfXE0V&&gX$|{?)Pw{Q5fir!o%_E>2tZ0@M2~?^JaO{ z|M8#HKjD*}FvSJ~E4~5)L;5f31O8vw2gg|d8~gv{U?%j`_^i_E~bUZl!hie4}#}p^0jZ#r^pprQ}}zF>u7SQKu?*?ehBZ{JSOm#V;PUxp`~J z7SLy^X z2gCI%^5EH}ZY!$dD+{YKMWALszL#mTgZd{Z$mB$_0-Dn4d4aFv2M7 zWEQmoTZ$n!2PHL!q!%xywmG`Jv>W-`$bY`xDj?mSoqF3IHwOB2sTl_kpJe5ZMNsm!bB{~@g= zZmZBoJRGzj7Ci&i*|5^UVoy-2-k*x=L#rlA7^SxRY{L~A&Xn^7+&5QD+u2^{%t=84 zB3N(_g+~R?C*>m6u5>;`b5g0YSvt0IF5<9rMnoHm1fWthD=Kr1pz1_`9Iq09|O&A8-BZBn$G{__!+gxj?a@rA8!g#OtIag>pCk^!@ijb_p z-^)maV0E_i?D1U#y8&O3wWL7<&c=|&K3-mLPAWAv&y1HyL2N!i!S~&txBlS7Q&(et zD(%$v0JhmvrIt+jI=rkYixXB_5U$8PXADPkt>kMOeblt9*1$SA<$Bh$rX)B=NysgP z!^Kv*zqlq=3J#Dwf~tl{js5r7H`{6Pz7;O_dTqro^)(7@;Mhc97H*Jvnm1-iqzMzE z-%jn_m3Vts#>M~&9xRCm6M4Hd7Dm$;*C(Vdx$fo{ghZ>U;XkMSgs35mxlKk+$jx&_ zO(HXRK&PfQ;E{+3&lb9apC~|yM*5+fqR9L@3^pL&ZMPY$$rGi~v}P(DDG&sBbQC0} zUD+-<#o6y~UqpNh1LxxMeH6|bFl3+UM?HID9{8-V+Daq+1`?G#HqpbgpFKauatA|9 znH;PS+m?W()UF)c7cj{KYrD@y?^ACJ)%%@mo7OKdnR2JUFB9_`zADDb3v773uJC08 z-r8)~2Mz3W_TEc*_rCa&g0w9mOe+A8oMCmT;f6L|hz%nka*cs&9Q>*ld-zGQe7+Pw zzS1t_xu21{i4J5m(^lW8o!^tmc9Ex{A7a9V)=(hMF~?qn7vz)>Hi!Vwv(nFgq{3jX_0JWzHd zD3hWsfnox!+&|i;E-Uyho|2lAxvydfvdYvv4ttd{S(aIwFMPhVB->XxoLpZvzbWfkPfwOe2vPW)hM!}@hZKgnKyfRIuc?X>kr|I zQPwYxD@BP2C3tZNM5boD0duKDWqnOjG&r(sq&Pg^3Pix&D@|Og>CARqyhyVjBC+nY zkF5gt6{t?ehz!cl?SB++FkQjSWCbbI&h5x>suCH;fbZerLZJ2&j2jNJ ziLOFQOrAQ&Mk^k`akz7E#?xx!ln|+ur)0{`;t%|ANIf#WvU6-=X2T8l3WMwZ>>){@ zB?6C0go&MQ<&O=USU!CjGDcBfU|WEu7+b09RrzesOhcc01d%WSZ`~^#FqGS@zwUKv zVBqz_cc{VSL9uID{ZRc7QnfQt`g8O#<#Ix|s^Mya;NlfKs3m2c+nhHPpjJEd9v%5; zQ{2v$Q}I1EQhbnj#~sk-=;8f&QVO*y&DGpOlv@}%qFMjW0p*AIW0k%Ds*C}0<4b1d zOGm!msdwwxGVe{}kKzFm>YIsftCti`PT3cPD*nsP1sGmfOXZXI1Co!TY=q8x>*y5} z`XkTYwpxjS$H7;KH@&l3?|aoH{S;_{J9!z1T+Gs9e+7-yk}bg)SvknujbiEmls09I zZW^)C(RtURgbY}Lz;AMpG-oOp-CF0E0e5>rvnt)x%=}nw;|^ti(ARl*LNo1B82PSd zB=Z4BrK|eb?_GWL^g;*d74_vNnbg;&edU2|RA;{xR0mjmGk*OhNEh~6d{r2n; zDN;6w1(>G4$Xc@;C1a!8E9eNN3j{Z2jclpYeUe4xBWPjx!V}4l{WzAe<+PASC``_7W4=ryN*h|3+V$IjS!)y{;l4|{!!_Zf2$x$a*l*tSCpH&^|(iLIgse~ z^~6Yf9#3FM1?B3{_$3^Lu3a4E%`+|1hiH8mg?CdLdC*L6WQ-l&&I_IMS2wqnYd(CBIlfdH>AIfvUQ~+)@NKzki~Kj_&ZDf9P*TaiYh!y* ze2JiGd-F8%f#N#&!poq>)f*AmiX1fex5M8)8Z8pOvMWF%Cqx3YysCh=L~NI^x>Maz zUYC%3jr~ehc|@EJ13vmXeKP71`CFZgG*`tg(gDvoo_+s0;rD3NciyxskrT#KG)QDY zc{!6XFKl3bhmEkQ)1Xx!ys?B$WL1+Y3sZ+Z8g1*be%6K;ZejdJ2r?_JJmB5 zde;XFjXSnbG^&y#8ja1F4rlqg;RpH0P*TJW5LhD!tmwzvaTtV5Q?z3CIaY@eyR}v( z=uzgkV{8IdJHDAEn`t^H>AU{EBItzJZV z<-_O6LK_qc=GCaqXqFAG#SZI6v=R1T(tew|vC|3D*4HG!hI9Qa1gjl{6F_;Cz>p`*$HWn=L_+A*PQ|dZz*8z zQ|DX9!PUwAc6}$^%9ERtaqvKOzQX=6VS5ei+QZM);)z;+m0a>tq^DOB<}7-=aCaWI zaeF*w@%_VBbT8(JzAAMzjdxARTu@fYOow*>84jJb2N=GNIn`79)0K zZn4cPCN-a&FOFraD7eVtG{i@eehAN4fGSUuQHGFEM$keYUeT4>UptHg>UCO=jc0n#GAVpo+PVQbz&Cd|%w7?-Wrz5;1lI z-4JJ;(=($kawDuv!+Ls~S3ZB@K!;!);p3Y2DVyl_n~IYXR2VSMY-@Dx z7v>^PTT{U#xlI|$nNpAJ&?!*5WRtAKPkIRsT)o&$Z2_0*%1EirSYy?vX%s0`Cq8UE zWyy!y8bE}Mr$M<yU3H8`zx?=AMVKf^JUJu-5lY1=^^zeV8YB3qdtfJ4O01Pu#N(6^Pt(q&^e zRI$0iEZZ4fab!V=%Iz4}&P8&=PrKki>4i0trXYWXl)ZoP2kkW9a$g3+nTbb;&z$UW zV8=zcQdf8Y$|?dhgjGWK?ML(%ys9u-gFGo3>e+>=J&*hxX{fbTgk(-NtJqO~U z2gEtYQBeURw?Ve>XgZ`7i2k}%fM`qO78~ui6r2;Yy{6yRu%?xwi{aU(`QBr9+rNx zU+g?eafD-RwUZN~n@P$qEK9;uL6X9_D*?{}hjJvYRW%~M zrlRvXU*U!v(s6Lp!?)Jv#Jq@rRXf>cT6Smd{2cZ?t4OjWnNQ0kedUAc7QyVwiqjOo zufCSUyhm9na!VJ*@goi}dXkGxhs8a15Xzq;d zXvD$yQpTw(x4%8PC=V}lRoR2)>_RR?;%&EI0nL+`u3e>bwYHosi+&PNT93=5n_+IS zhZwIHT_E%rx6NgF?YV3WlM49mkoVvZXtkn06-KG!cbqA->e?wE+0DSny^ZZ){(Hhz zq_cQ3!JB@rYvdP`zOB{!AsP&|GwE&e;b$$Ce8x3-%;%^G)`>4;9{eYieI#51E;bFv z%YB;`K}38(jyuI)QK!lhY9>g~UJxhKgdpq!BNk_qqYSJ0?|Z7pvo@{s+Cgp=NVJ=Dw3+3ozd zn_ujrx2)?Vv0wv=ZQUDbWYuG}5MDil5_IH^J>2zD^($<{Lm>>@!YQDk4P$antPH!Q z;>nRLPI>R0xkX~RL94IMD)ACrNccdE%+w;wF8{Fm0j9$|HLtu_;ULCdKpelzx-68l z1nS`FlBis;I6H)xv(2(%X7pONm3N@m=U?{E8^Ja%FD*6}IL%3#z;=V*%E!IODzXM2 zq`|Vsr5K;dETN9!~C%_tv|;)0U@Ir?wL|^#Wmc1hOJ4eq4y?Eh1$Sa(~_Q^X~ZpIHZ)5 z)}|f-vS-nGLi~7;pu#bRDz83d(6sEs_s&+cU!f=dROA)p+02||vx`7YSot`UP^M$9 z(j{J91WzVGw=iHKWU=M!f1>`nV`qri9fm@@9mDO#yh8y`AVA3%D(g={98bC=TKFs6 zn^!rCiK$*c&K&{OfZzl>?XHs5R=90prHKK2yCSU#7LyGKV8TcZHHtb#?Yb73LsFUR ziRPa#KX`R-~@3lu+=DMnPIS(@WF$sFApe-L+DoFLc zDDIm9%_JZr{p!f7om`z^%QzIJNPHQUzfj&L01ke)I;mmvnmX^u)svAt=oJ@W-}a1n zzX!4)^}b*JDPx0wVgUQUjhs;a@6nR~nJ5AKKL=3$e}+x|`-=bUkpBCMhChANf6uu5 z+x?%O=s)?N-spe3|7Rus-|iI%|LG3+xBWl0_kZ#~HF!lCDCoaF0>qz}6dVl9f&8!4 F{{mS)ea-*? literal 0 HcmV?d00001 From 0659a9cad4b7cf68a2dbfdca2f1e660a14f07f8e Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Mon, 24 Jul 2023 22:26:51 -0700 Subject: [PATCH 14/41] Update test.yml --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3bd8cde..b4acccf 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -41,7 +41,7 @@ jobs: run: | REMOTE=/app/nf-encyclopedia docker run -v $(pwd):${REMOTE} nf-encyclopedia:latest \ - pytest tests/unit_tests # -w ${REMOTE} + pytest ${REMOTE}/tests/unit_tests - name: Run System Tests run: | From 2b9ea7c5c120c978a1b4791d9354a1f4672adc4e Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Tue, 25 Jul 2023 14:17:01 -0700 Subject: [PATCH 15/41] bumped ims conversion version --- modules/ims.nf | 2 +- subworkflows/encyclopedia.nf | 42 ++++++++++++++++++------------------ 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/modules/ims.nf b/modules/ims.nf index a537c9a..7e840cf 100644 --- a/modules/ims.nf +++ b/modules/ims.nf @@ -2,7 +2,7 @@ process ADD_IMS_INFO { publishDir "${params.result_dir}/${group}/blib", pattern: '*.ims.blib', failOnError: true label 'process_medium' - container 'ghcr.io/talusbio/flimsay:v0.2.0' + container 'ghcr.io/talusbio/flimsay:v0.4.0' input: val group diff --git a/subworkflows/encyclopedia.nf b/subworkflows/encyclopedia.nf index 9f7f15f..5a4889a 100644 --- a/subworkflows/encyclopedia.nf +++ b/subworkflows/encyclopedia.nf @@ -62,8 +62,8 @@ workflow PERFORM_QUANT { quant_files | transpose() | multiMap { qit -> - mzml: tuple qit[0], qit[1] - elib: qit[2] + mzml: tuple qit[0], qit[1] + elib: qit[2] } | set { ungrouped_files } @@ -86,26 +86,26 @@ workflow PERFORM_QUANT { // Only run group-wise global if needed. if ( local_only ) { - Channel.empty() | set { global_files } - Channel.empty() | set { msstats_files } - Channel.empty() | set { global_blib } + Channel.empty() | set { global_files } + Channel.empty() | set { msstats_files } + Channel.empty() | set { global_blib } } else { - // Do the global analysis - // Output is [group, peptides_txt, proteins_txt] - ENCYCLOPEDIA_AGGREGATE( - local_files, - dlib, - fasta, - params.encyclopedia.quant_suffix, - true // Align RTs - ) - | set { agg_outs } - - agg_outs.lib | - map { libe -> libe[1] } | - set { global_blib } - agg_outs.quant - | set { global_files } + // Do the global analysis + // Output is [group, peptides_txt, proteins_txt] + ENCYCLOPEDIA_AGGREGATE( + local_files, + dlib, + fasta, + params.encyclopedia.quant_suffix, + true // Align RTs + ) + | set { agg_outs } + + agg_outs.lib | + map { libe -> libe[1] } | + set { global_blib } + agg_outs.quant + | set { global_files } } emit: From 5f74e08026bb381c6287a2c7085628b18338da2f Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Tue, 25 Jul 2023 14:24:56 -0700 Subject: [PATCH 16/41] updated msstats tests for resolutoin --- tests/unit_tests/test_msstats.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/unit_tests/test_msstats.py b/tests/unit_tests/test_msstats.py index 0a34bbd..2b18186 100644 --- a/tests/unit_tests/test_msstats.py +++ b/tests/unit_tests/test_msstats.py @@ -15,6 +15,9 @@ Path("QCPlot.pdf"), ] +""" +Note that the script fixture is defined at the bottom of this file. +""" def test_joins(msstats_input, script): """Test that the joins are made correctly""" @@ -217,7 +220,8 @@ def script(monkeypatch, tmp_path): """Set the working directory""" (tmp_path / "msstats").mkdir(exist_ok=True) (tmp_path / "results").mkdir(exist_ok=True) - script_path = Path("bin/msstats.R").resolve() + script_location = Path(__file__).parent / "../../bin" + script_path = (script_location / "msstats.R").resolve() monkeypatch.syspath_prepend(script_path) monkeypatch.chdir(tmp_path) return script_path From e6f583266c7076be2d3dc0d20087ec8d1c0ea3b6 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Tue, 25 Jul 2023 14:32:44 -0700 Subject: [PATCH 17/41] fixed input piping to ims --- subworkflows/encyclopedia.nf | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/subworkflows/encyclopedia.nf b/subworkflows/encyclopedia.nf index 5a4889a..dae9a65 100644 --- a/subworkflows/encyclopedia.nf +++ b/subworkflows/encyclopedia.nf @@ -91,7 +91,10 @@ workflow PERFORM_QUANT { Channel.empty() | set { global_blib } } else { // Do the global analysis - // Output is [group, peptides_txt, proteins_txt] + // Output is [ + // lib: [group, elib, blib, log, summary.csv] + // quant: [group, peptides_txt, proteins_txt] + // ] ENCYCLOPEDIA_AGGREGATE( local_files, dlib, @@ -102,7 +105,7 @@ workflow PERFORM_QUANT { | set { agg_outs } agg_outs.lib | - map { libe -> libe[1] } | + map { libe -> libe[2] } | set { global_blib } agg_outs.quant | set { global_files } From 61c6b0ea1824512b524e462eb9196e089b0c047d Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Tue, 25 Jul 2023 14:47:33 -0700 Subject: [PATCH 18/41] changed log handling in skyline steps --- modules/skyline.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/skyline.nf b/modules/skyline.nf index 9747e2b..318acb5 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -60,8 +60,8 @@ process SKYLINE_IMPORT_DATA { wine SkylineCmd \ --in="${skyline_zipfile.baseName}" \ --import-no-join \ - --log-file="${raw_file.baseName}.log" \ --import-file="${raw_file}" \ + 2>&1 | tee "${raw_file.baseName}.log" """ stub: @@ -95,12 +95,12 @@ process SKYLINE_MERGE_RESULTS { wine SkylineCmd \ --in="${skyline_zipfile.baseName}" \ - --log-file="skyline-merge.log" \ ${import_files_params} \ --out="final.sky" \ --save \ --share-zip="final.sky.zip" \ - --share-type="complete" + --share-type="complete" \ + 2>&1 | tee "skyline-merge.log" """ stub: From 427ff6316fad884d3a2361d16a2b6c00d02bf788 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Tue, 25 Jul 2023 14:59:42 -0700 Subject: [PATCH 19/41] changed log handling in skylines first step --- modules/skyline.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/skyline.nf b/modules/skyline.nf index 318acb5..1bc5c8c 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -19,13 +19,13 @@ process SKYLINE_ADD_LIB { wine SkylineCmd \ --in="${skyline_template_zipfile.baseName}" \ - --log-file=skyline_add_library.log \ --import-fasta="${fasta}" \ --add-library-path="${blib}" \ --out="results.sky" \ --save \ --share-zip="results.sky.zip" \ - --share-type="complete" + --share-type="complete" \ + 2>&1 | tee skyline_add_library.log \ """ stub: From 17c34a60534a958596a04489a72f8dc5bf51fb58 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Tue, 25 Jul 2023 15:18:05 -0700 Subject: [PATCH 20/41] updated position in skyline adds --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 93947e6..470fb06 100644 --- a/main.nf +++ b/main.nf @@ -132,7 +132,7 @@ workflow { ADD_IMS_INFO(groups, quant_results.blib) | set { blib } - SKYLINE_ADD_LIB(skyline_empty_template, blib, fasta) + SKYLINE_ADD_LIB(skyline_empty_template, fasta, blib) | set { skyline_template_zipfile } // This will generate a skyd for every raw data file From f9baf4ad6889f149a04de8ea733eecbdc2296ba2 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Tue, 25 Jul 2023 15:45:38 -0700 Subject: [PATCH 21/41] handled decompression --- modules/skyline.nf | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/modules/skyline.nf b/modules/skyline.nf index 1bc5c8c..a31d13a 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -56,11 +56,17 @@ process SKYLINE_IMPORT_DATA { script: """ unzip ${skyline_zipfile} + if [[ ${raw_file} == *.tar.d ]] ; then + tar -xvf ${raw_file} + raw_file=$(ls *.d) + else + local_rawfile=${raw_file} + fi wine SkylineCmd \ --in="${skyline_zipfile.baseName}" \ --import-no-join \ - --import-file="${raw_file}" \ + --import-file="${local_rawfile}" \ 2>&1 | tee "${raw_file.baseName}.log" """ From 777f65fd63273d25dc583c9bbcb30c4b034031ee Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Tue, 25 Jul 2023 15:53:59 -0700 Subject: [PATCH 22/41] handled decompression on grouping stage --- modules/skyline.nf | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/modules/skyline.nf b/modules/skyline.nf index a31d13a..269636e 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -95,9 +95,18 @@ process SKYLINE_MERGE_RESULTS { path("skyline-merge.log"), emit: log script: - import_files_params = "--import-file=${(mzml_files as List).collect{ "/tmp/" + file(it).name }.join(' --import-file=')}" """ unzip ${skyline_zipfile} + for f in *.tar.d ]] ; do + tar -xvf ${f} + done + + local_files=$(ls *.raw *.mzml *.mzML) + import_files_params="" + for f in $local_files ; do + import_files_params=" ${import_files_params} --import-file=${f}" + done + wine SkylineCmd \ --in="${skyline_zipfile.baseName}" \ From 9529b39ffb8128ca4f037b25c24f7b04dc899e86 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Tue, 25 Jul 2023 16:05:38 -0700 Subject: [PATCH 23/41] escaped bash variables --- modules/skyline.nf | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/skyline.nf b/modules/skyline.nf index 269636e..aef810f 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -58,7 +58,7 @@ process SKYLINE_IMPORT_DATA { unzip ${skyline_zipfile} if [[ ${raw_file} == *.tar.d ]] ; then tar -xvf ${raw_file} - raw_file=$(ls *.d) + raw_file=\$(ls *.d) else local_rawfile=${raw_file} fi @@ -66,7 +66,7 @@ process SKYLINE_IMPORT_DATA { wine SkylineCmd \ --in="${skyline_zipfile.baseName}" \ --import-no-join \ - --import-file="${local_rawfile}" \ + --import-file="\${local_rawfile}" \ 2>&1 | tee "${raw_file.baseName}.log" """ @@ -98,19 +98,19 @@ process SKYLINE_MERGE_RESULTS { """ unzip ${skyline_zipfile} for f in *.tar.d ]] ; do - tar -xvf ${f} + tar -xvf \${f} done - local_files=$(ls *.raw *.mzml *.mzML) + local_files=\$(ls *.raw *.mzml *.mzML) import_files_params="" - for f in $local_files ; do - import_files_params=" ${import_files_params} --import-file=${f}" + for f in \$local_files ; do + import_files_params=" \${import_files_params} --import-file=\${f}" done wine SkylineCmd \ --in="${skyline_zipfile.baseName}" \ - ${import_files_params} \ + \${import_files_params} \ --out="final.sky" \ --save \ --share-zip="final.sky.zip" \ From ea299370b17130618df991047562dbf0d387b2ad Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Tue, 25 Jul 2023 16:16:42 -0700 Subject: [PATCH 24/41] bugfix minor typo on file type --- modules/skyline.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/skyline.nf b/modules/skyline.nf index aef810f..f871e15 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -56,7 +56,7 @@ process SKYLINE_IMPORT_DATA { script: """ unzip ${skyline_zipfile} - if [[ ${raw_file} == *.tar.d ]] ; then + if [[ ${raw_file} == *.d.tar ]] ; then tar -xvf ${raw_file} raw_file=\$(ls *.d) else From 88b9d496e2de65d52178c013c414992a2047b81c Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Tue, 25 Jul 2023 16:26:47 -0700 Subject: [PATCH 25/41] bugfix minor typo on file type --- modules/skyline.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/skyline.nf b/modules/skyline.nf index f871e15..f037baa 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -58,7 +58,7 @@ process SKYLINE_IMPORT_DATA { unzip ${skyline_zipfile} if [[ ${raw_file} == *.d.tar ]] ; then tar -xvf ${raw_file} - raw_file=\$(ls *.d) + local_rawfile=\$(ls *.d) else local_rawfile=${raw_file} fi From 1241330f229f53654cf887c5c99a3d693ac41bf4 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Tue, 25 Jul 2023 21:41:11 -0700 Subject: [PATCH 26/41] changed way to list files in bash --- modules/skyline.nf | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/skyline.nf b/modules/skyline.nf index f037baa..7776a8a 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -58,7 +58,7 @@ process SKYLINE_IMPORT_DATA { unzip ${skyline_zipfile} if [[ ${raw_file} == *.d.tar ]] ; then tar -xvf ${raw_file} - local_rawfile=\$(ls *.d) + local_rawfile=\$(find \${PWD} -d -name "*.d") else local_rawfile=${raw_file} fi @@ -101,9 +101,8 @@ process SKYLINE_MERGE_RESULTS { tar -xvf \${f} done - local_files=\$(ls *.raw *.mzml *.mzML) import_files_params="" - for f in \$local_files ; do + for f in *.{d,mzml,mzML} ; do import_files_params=" \${import_files_params} --import-file=\${f}" done From ca9ecc7ba29b07554839540e72ec486c8f43e30e Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Tue, 25 Jul 2023 22:11:45 -0700 Subject: [PATCH 27/41] fixed yet another bug in the bash side of the workflow --- modules/skyline.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/skyline.nf b/modules/skyline.nf index 7776a8a..ec077f2 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -97,7 +97,7 @@ process SKYLINE_MERGE_RESULTS { script: """ unzip ${skyline_zipfile} - for f in *.tar.d ]] ; do + for f in *.d.tar ; do tar -xvf \${f} done From 585b568c92de768147de2e1328111f3acfafdf49 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Tue, 25 Jul 2023 22:37:01 -0700 Subject: [PATCH 28/41] changed val to path in skyline step --- main.nf | 7 ++----- modules/skyline.nf | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/main.nf b/main.nf index 470fb06..679b198 100644 --- a/main.nf +++ b/main.nf @@ -142,13 +142,10 @@ workflow { ) | set { skyline_import_results } - raw_quant_files = raw_quant_files.collect() - skyd_files = skyline_import_results.skyd_file.collect() - SKYLINE_MERGE_RESULTS( skyline_template_zipfile.skyline_zipfile, - skyd_files, - raw_quant_files, + skyd_files.collect(), + raw_quant_files.collect(), ) | set { skyline_merge_results } diff --git a/modules/skyline.nf b/modules/skyline.nf index ec077f2..7446062 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -88,7 +88,7 @@ process SKYLINE_MERGE_RESULTS { input: path skyline_zipfile path '*.skyd' - val raw_files + path raw_files output: path("final.sky.zip"), emit: final_skyline_zipfile From 4c7d79f72d97e544ec7ca8f2decb0d03fad429ba Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Tue, 25 Jul 2023 23:20:15 -0700 Subject: [PATCH 29/41] added debugging prints --- modules/skyline.nf | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/modules/skyline.nf b/modules/skyline.nf index 7446062..a524306 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -96,6 +96,12 @@ process SKYLINE_MERGE_RESULTS { script: """ + + echo ${raw_files} + + echo "Directory status >>>>" + ls -lctha # For debugging ... + unzip ${skyline_zipfile} for f in *.d.tar ; do tar -xvf \${f} @@ -106,6 +112,8 @@ process SKYLINE_MERGE_RESULTS { import_files_params=" \${import_files_params} --import-file=\${f}" done + echo "Import file params >>>" + echo \${import_files_params} wine SkylineCmd \ --in="${skyline_zipfile.baseName}" \ From efd04ef79c3ff8dc86d56c61d7de048f84070ac6 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Tue, 25 Jul 2023 23:28:24 -0700 Subject: [PATCH 30/41] yet another fix --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 679b198..11a73d3 100644 --- a/main.nf +++ b/main.nf @@ -144,7 +144,7 @@ workflow { SKYLINE_MERGE_RESULTS( skyline_template_zipfile.skyline_zipfile, - skyd_files.collect(), + skyline_import_results.skyd_file.collect(), raw_quant_files.collect(), ) | set { skyline_merge_results } From 829081b785a9f7eab220cb395e940e78e545186d Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Wed, 26 Jul 2023 00:28:25 -0700 Subject: [PATCH 31/41] added more debugging info and added handling of raw files --- modules/skyline.nf | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/modules/skyline.nf b/modules/skyline.nf index a524306..d202119 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -97,19 +97,37 @@ process SKYLINE_MERGE_RESULTS { script: """ + echo "Input Raw files" echo ${raw_files} echo "Directory status >>>>" ls -lctha # For debugging ... + echo "Unzipping skyline template file" unzip ${skyline_zipfile} - for f in *.d.tar ; do - tar -xvf \${f} - done import_files_params="" - for f in *.{d,mzml,mzML} ; do - import_files_params=" \${import_files_params} --import-file=\${f}" + + if [[ \$(find \${PWD} -type d -name "*.d") ]] ; then + for f in *.d.tar ; do + echo "Decompressing \${f}" + tar -xvf \${f} + done + + for f in *.d ; do + import_files_params=" \${import_files_params} --import-file=\${f}" + done + else + fi + + echo "Import file params >>>" + echo \${import_files_params} + + for ftype in raw mzml mzML; do + echo ">>> Looking for \${ftype} files" + for f in \$(find \${PWD} -type d -name "*.\${ftype}"); do + import_files_params=" \${import_files_params} --import-file=\${f}" + done done echo "Import file params >>>" From 5b76bfe23e078f6c0c605887bdaa8f25a50bed2e Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Wed, 26 Jul 2023 00:41:48 -0700 Subject: [PATCH 32/41] handled empty else clause --- modules/skyline.nf | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/modules/skyline.nf b/modules/skyline.nf index d202119..1401861 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -108,16 +108,19 @@ process SKYLINE_MERGE_RESULTS { import_files_params="" - if [[ \$(find \${PWD} -type d -name "*.d") ]] ; then + if [[ \$(find \${PWD} -type f -name "*.d.tar") ]] ; then for f in *.d.tar ; do echo "Decompressing \${f}" tar -xvf \${f} done + else + echo "No compressed .d files found" + fi + if [[ \$(find \${PWD} -type d -name "*.d") ]] ; then for f in *.d ; do import_files_params=" \${import_files_params} --import-file=\${f}" done - else fi echo "Import file params >>>" From 362dadbda2366b1d564293998d5356c725a3fd75 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Wed, 26 Jul 2023 09:22:59 -0700 Subject: [PATCH 33/41] added a lot of extra params --- modules/skyline.nf | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/modules/skyline.nf b/modules/skyline.nf index 1401861..435ef88 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -25,6 +25,17 @@ process SKYLINE_ADD_LIB { --save \ --share-zip="results.sky.zip" \ --share-type="complete" \ + --tran-precursor-ion-charges="2, 3 ,4" \ + --tran-product-ion-charges="1,2" \ + --tran-product-ion-types="b, y, p" \ + --tran-use-dia-exclusion \ + --library-pick-product-ions="all_plus" \ + --tran-product-start-ion="ion 2" \ + --tran-product-end-ion="last ion - 2" \ + --associate-protein-group-proteins \ + --full-scan-product-res=10.0 \ + --full-scan-product-analyzer=centroided \ + --full-scan-acquisition-method="DIA" \ 2>&1 | tee skyline_add_library.log \ """ @@ -59,8 +70,10 @@ process SKYLINE_IMPORT_DATA { if [[ ${raw_file} == *.d.tar ]] ; then tar -xvf ${raw_file} local_rawfile=\$(find \${PWD} -d -name "*.d") + import_extra_params=" --ims-library-res=30 --full-scan-isolation-scheme=\${local_rawfile}" else local_rawfile=${raw_file} + import_extra_params=" --full-scan-isolation-scheme=\${local_rawfile}" fi wine SkylineCmd \ @@ -96,7 +109,6 @@ process SKYLINE_MERGE_RESULTS { script: """ - echo "Input Raw files" echo ${raw_files} @@ -107,6 +119,7 @@ process SKYLINE_MERGE_RESULTS { unzip ${skyline_zipfile} import_files_params="" + import_extra_params="" if [[ \$(find \${PWD} -type f -name "*.d.tar") ]] ; then for f in *.d.tar ; do @@ -120,6 +133,7 @@ process SKYLINE_MERGE_RESULTS { if [[ \$(find \${PWD} -type d -name "*.d") ]] ; then for f in *.d ; do import_files_params=" \${import_files_params} --import-file=\${f}" + import_extra_params=" --ims-library-res=30 --full-scan-isolation-scheme=\${f}" done fi @@ -128,7 +142,7 @@ process SKYLINE_MERGE_RESULTS { for ftype in raw mzml mzML; do echo ">>> Looking for \${ftype} files" - for f in \$(find \${PWD} -type d -name "*.\${ftype}"); do + for f in \$(find \${PWD} -type f -name "*.\${ftype}"); do import_files_params=" \${import_files_params} --import-file=\${f}" done done @@ -143,7 +157,13 @@ process SKYLINE_MERGE_RESULTS { --save \ --share-zip="final.sky.zip" \ --share-type="complete" \ + --reintegrate-model-name="reintegration_res" \ + --reintegrate-create-model \ + --full-scan-filter-tolerance=2 \ 2>&1 | tee "skyline-merge.log" + + echo "Directory status >>>>" + ls -lctha # For debugging ... """ stub: From b5cd217239158ba63b02a863e776761ee234a713 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Wed, 26 Jul 2023 09:47:12 -0700 Subject: [PATCH 34/41] added unused parameter --- modules/skyline.nf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/skyline.nf b/modules/skyline.nf index 435ef88..4f75ba2 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -80,6 +80,7 @@ process SKYLINE_IMPORT_DATA { --in="${skyline_zipfile.baseName}" \ --import-no-join \ --import-file="\${local_rawfile}" \ + \${import_extra_params} \ 2>&1 | tee "${raw_file.baseName}.log" """ @@ -160,6 +161,7 @@ process SKYLINE_MERGE_RESULTS { --reintegrate-model-name="reintegration_res" \ --reintegrate-create-model \ --full-scan-filter-tolerance=2 \ + $\{import_extra_params} \ 2>&1 | tee "skyline-merge.log" echo "Directory status >>>>" From 39bfc359070c7cd37bac3f0015c7902ade5cd477 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Wed, 26 Jul 2023 10:45:14 -0700 Subject: [PATCH 35/41] fixed name in a parameter --- modules/skyline.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/skyline.nf b/modules/skyline.nf index 4f75ba2..005ee0e 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -28,7 +28,7 @@ process SKYLINE_ADD_LIB { --tran-precursor-ion-charges="2, 3 ,4" \ --tran-product-ion-charges="1,2" \ --tran-product-ion-types="b, y, p" \ - --tran-use-dia-exclusion \ + --tran-use-dia-window-exclusion \ --library-pick-product-ions="all_plus" \ --tran-product-start-ion="ion 2" \ --tran-product-end-ion="last ion - 2" \ From f70505b1c02f3edbccbd4b0cd2be8ef5aa8623a2 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Wed, 26 Jul 2023 11:01:49 -0700 Subject: [PATCH 36/41] fixed name in a parameter --- modules/skyline.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/skyline.nf b/modules/skyline.nf index 005ee0e..55daddd 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -161,7 +161,7 @@ process SKYLINE_MERGE_RESULTS { --reintegrate-model-name="reintegration_res" \ --reintegrate-create-model \ --full-scan-filter-tolerance=2 \ - $\{import_extra_params} \ + \${import_extra_params} \ 2>&1 | tee "skyline-merge.log" echo "Directory status >>>>" From 698b31d993d01d99bd58a5f62b5f5afa5231f876 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Wed, 26 Jul 2023 11:17:21 -0700 Subject: [PATCH 37/41] fixed name in a parameter --- modules/skyline.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/skyline.nf b/modules/skyline.nf index 55daddd..159279f 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -32,7 +32,8 @@ process SKYLINE_ADD_LIB { --library-pick-product-ions="all_plus" \ --tran-product-start-ion="ion 2" \ --tran-product-end-ion="last ion - 2" \ - --associate-protein-group-proteins \ + --associate-proteins-minimal-protein-list \ + --associate-proteins-group-proteins \ --full-scan-product-res=10.0 \ --full-scan-product-analyzer=centroided \ --full-scan-acquisition-method="DIA" \ From 091a088cb107b5404d04dc36b16b6b64c432cd39 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Wed, 26 Jul 2023 12:30:52 -0700 Subject: [PATCH 38/41] moved parameters from termplate to import --- modules/skyline.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/skyline.nf b/modules/skyline.nf index 159279f..506a939 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -36,7 +36,6 @@ process SKYLINE_ADD_LIB { --associate-proteins-group-proteins \ --full-scan-product-res=10.0 \ --full-scan-product-analyzer=centroided \ - --full-scan-acquisition-method="DIA" \ 2>&1 | tee skyline_add_library.log \ """ @@ -81,6 +80,7 @@ process SKYLINE_IMPORT_DATA { --in="${skyline_zipfile.baseName}" \ --import-no-join \ --import-file="\${local_rawfile}" \ + --full-scan-acquisition-method="DIA" \ \${import_extra_params} \ 2>&1 | tee "${raw_file.baseName}.log" """ From d30a1d056e58dd3990cb8ad66e3ee27fd0c334f1 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Wed, 26 Jul 2023 12:58:05 -0700 Subject: [PATCH 39/41] added rt filter --- modules/skyline.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/skyline.nf b/modules/skyline.nf index 506a939..578666b 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -36,6 +36,7 @@ process SKYLINE_ADD_LIB { --associate-proteins-group-proteins \ --full-scan-product-res=10.0 \ --full-scan-product-analyzer=centroided \ + --full-scan-rt-filter-tolerance=2 \ 2>&1 | tee skyline_add_library.log \ """ @@ -161,7 +162,6 @@ process SKYLINE_MERGE_RESULTS { --share-type="complete" \ --reintegrate-model-name="reintegration_res" \ --reintegrate-create-model \ - --full-scan-filter-tolerance=2 \ \${import_extra_params} \ 2>&1 | tee "skyline-merge.log" From f433d2b44ed30675f93be7c8c76a685ccfc9f3a1 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Fri, 28 Jul 2023 15:34:56 -0700 Subject: [PATCH 40/41] moved ims params to sky template creation --- modules/skyline.nf | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/modules/skyline.nf b/modules/skyline.nf index 578666b..eb9e0f7 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -1,3 +1,6 @@ + +// Code modified from + process SKYLINE_ADD_LIB { publishDir "${params.result_dir}/skyline/", failOnError: true, mode: 'copy' label 'process_medium' @@ -37,6 +40,7 @@ process SKYLINE_ADD_LIB { --full-scan-product-res=10.0 \ --full-scan-product-analyzer=centroided \ --full-scan-rt-filter-tolerance=2 \ + --ims-library-res=30 \ 2>&1 | tee skyline_add_library.log \ """ @@ -71,7 +75,7 @@ process SKYLINE_IMPORT_DATA { if [[ ${raw_file} == *.d.tar ]] ; then tar -xvf ${raw_file} local_rawfile=\$(find \${PWD} -d -name "*.d") - import_extra_params=" --ims-library-res=30 --full-scan-isolation-scheme=\${local_rawfile}" + import_extra_params=" --full-scan-isolation-scheme=\${local_rawfile}" else local_rawfile=${raw_file} import_extra_params=" --full-scan-isolation-scheme=\${local_rawfile}" @@ -136,7 +140,7 @@ process SKYLINE_MERGE_RESULTS { if [[ \$(find \${PWD} -type d -name "*.d") ]] ; then for f in *.d ; do import_files_params=" \${import_files_params} --import-file=\${f}" - import_extra_params=" --ims-library-res=30 --full-scan-isolation-scheme=\${f}" + import_extra_params=" --full-scan-isolation-scheme=\${f}" done fi From ddae2d6417f08fc601c63ff1a0daa296eb7f877e Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Fri, 28 Jul 2023 16:16:16 -0700 Subject: [PATCH 41/41] added mem logging and decoys --- modules/skyline.nf | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/modules/skyline.nf b/modules/skyline.nf index eb9e0f7..3e76334 100644 --- a/modules/skyline.nf +++ b/modules/skyline.nf @@ -1,5 +1,5 @@ -// Code modified from +// Code modified from https://github.com/mriffle/nf-teirex-dia/blob/main/modules/skyline.nf process SKYLINE_ADD_LIB { publishDir "${params.result_dir}/skyline/", failOnError: true, mode: 'copy' @@ -41,6 +41,9 @@ process SKYLINE_ADD_LIB { --full-scan-product-analyzer=centroided \ --full-scan-rt-filter-tolerance=2 \ --ims-library-res=30 \ + --decoys-add=shuffle \ + --timestamp \ + --memstamp \ 2>&1 | tee skyline_add_library.log \ """ @@ -86,6 +89,8 @@ process SKYLINE_IMPORT_DATA { --import-no-join \ --import-file="\${local_rawfile}" \ --full-scan-acquisition-method="DIA" \ + --timestamp \ + --memstamp \ \${import_extra_params} \ 2>&1 | tee "${raw_file.baseName}.log" """ @@ -166,6 +171,8 @@ process SKYLINE_MERGE_RESULTS { --share-type="complete" \ --reintegrate-model-name="reintegration_res" \ --reintegrate-create-model \ + --timestamp \ + --memstamp \ \${import_extra_params} \ 2>&1 | tee "skyline-merge.log"