diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100755 index 00000000..26300189 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,236 @@ +name: test +on: + pull_request: + branches: [main, develop] + +# Cancel if a newer run is started +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity + +jobs: + prettier: + runs-on: ubuntu-latest + steps: + - name: Check out repository + uses: actions/checkout@v3 + + - name: Install NodeJS + uses: actions/setup-node@v3 + + - name: Install Prettier + run: npm install -g prettier + + - name: Run Prettier --check + run: prettier --check . + + editorconfig: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-node@v3 + + - name: Install editorconfig-checker + run: npm install -g editorconfig-checker + + - name: Run ECLint check + run: editorconfig-checker -exclude README.md $(git ls-files | grep -v test) + + pytest-changes: + name: pytest-changes + runs-on: ubuntu-latest + outputs: + # Expose matched filters as job 'modules' output variable + modules: ${{ steps.filter.outputs.changes }} + steps: + - uses: actions/checkout@v3 + + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: "tests/config/pytest_modules.yml" + + nf-core-lint: + runs-on: ubuntu-20.04 + name: nf-core-lint + needs: [pytest-changes] + if: needs.pytest-changes.outputs.modules != '[]' + strategy: + fail-fast: false + matrix: + tags: ["${{ fromJson(needs.pytest-changes.outputs.modules) }}"] + exclude: + - tags: "nf-test" + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.x" + + - uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.x" + + - name: Install pip + run: python -m pip install --upgrade pip + + - name: Install nf-core tools development version + run: python -m pip install --upgrade --force-reinstall git+https://github.com/nf-core/tools.git@dev + + - name: Setup Nextflow + uses: nf-core/setup-nextflow@v1 + + - name: Lint module ${{ matrix.tags }} + run: nf-core modules lint ${{ matrix.tags }} + if: ${{ !startsWith(matrix.tags, 'subworkflows/') }} + + - name: Remove substring from matrix.tags + id: remove_substring + run: echo subworkflow_names=$(echo "${{ matrix.tags }}" | sed 's/subworkflows\///g') >> $GITHUB_OUTPUT + + - name: Lint subworkflow ${{ matrix.tags }} + run: nf-core subworkflows lint ${{ steps.remove_substring.outputs.subworkflow_names }} + if: startsWith( matrix.tags, 'subworkflows/' ) + + pytest: + runs-on: ubuntu-20.04 + name: pytest + needs: [pytest-changes] + if: needs.pytest-changes.outputs.modules != '[]' + strategy: + fail-fast: false + matrix: + tags: ["${{ fromJson(needs.pytest-changes.outputs.modules) }}"] + profile: ["docker", "singularity", "conda"] + env: + NXF_ANSI_LOG: false + SENTIEON_LICENSE_BASE64: ${{ secrets.SENTIEON_LICENSE_BASE64 }} + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.x" + + - uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install Python dependencies + run: python -m pip install --upgrade pip pytest-workflow cryptography + + - name: Setup Nextflow ${{ matrix.NXF_VER }} + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Set up Singularity + if: matrix.profile == 'singularity' + uses: eWaterCycle/setup-singularity@v7 + with: + singularity-version: 3.7.1 + + - name: Set up Singularity + if: matrix.profile == 'singularity' + run: | + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Set up miniconda + if: matrix.profile == 'conda' + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + channels: conda-forge,bioconda,defaults + python-version: ${{ matrix.python-version }} + + - name: Conda setup + if: matrix.profile == 'conda' + run: | + conda clean -a + conda install -n base conda-libmamba-solver + conda config --set solver libmamba + + # Set up secrets + - name: Set up nextflow secrets + if: env.SENTIEON_LICENSE_BASE64 != null + run: | + nextflow secrets set SENTIEON_LICENSE_BASE64 ${{ secrets.SENTIEON_LICENSE_BASE64 }} + nextflow secrets set SENTIEON_AUTH_MECH_BASE64 ${{ secrets.SENTIEON_AUTH_MECH_BASE64 }} + SENTIEON_ENCRYPTION_KEY=$(echo -n "${{ secrets.ENCRYPTION_KEY_BASE64 }}" | base64 -d) + SENTIEON_LICENSE_MESSAGE=$(echo -n "${{ secrets.LICENSE_MESSAGE_BASE64 }}" | base64 -d) + SENTIEON_AUTH_DATA=$(python tests/modules/nf-core/sentieon/license_message.py encrypt --key "$SENTIEON_ENCRYPTION_KEY" --message "$SENTIEON_LICENSE_MESSAGE") + SENTIEON_AUTH_DATA_BASE64=$(echo -n "$SENTIEON_AUTH_DATA" | base64 -w 0) + nextflow secrets set SENTIEON_AUTH_DATA_BASE64 $SENTIEON_AUTH_DATA_BASE64 + + # Test the module + - name: Run pytest-workflow + # only use one thread for pytest-workflow to avoid race condition on conda cache. + run: TMPDIR=~ PROFILE=${{ matrix.profile }} pytest --tag ${{ matrix.tags }} --symlink --kwdof --git-aware --color=yes + + - name: Output log on failure + if: failure() + run: | + sudo apt-get update > /dev/null + sudo apt-get install bat > /dev/null + batcat --decorations=always --color=always /home/runner/pytest_workflow_*/*/log.{out,err} + + - name: Setting global variables + uses: actions/github-script@v6 + id: parsed + with: + script: | + return '${{ matrix.tags }}'.toLowerCase().replaceAll(/\//g, '-').trim('-').trim('"') + result-encoding: string + + - name: Upload logs on failure + if: failure() + uses: actions/upload-artifact@v2 + with: + name: logs-${{ matrix.profile }}-${{ steps.parsed.outputs.result }} + path: | + /home/runner/pytest_workflow_*/*/.nextflow.log + /home/runner/pytest_workflow_*/*/log.out + /home/runner/pytest_workflow_*/*/log.err + /home/runner/pytest_workflow_*/*/work + !/home/runner/pytest_workflow_*/*/work/conda + !/home/runner/pytest_workflow_*/*/work/singularity + !${{ github.workspace }}/.singularity + + confirm-pass: + runs-on: ubuntu-latest + needs: [prettier, editorconfig, pytest-changes, nf-core-lint, pytest] + if: always() + steps: + - name: All tests ok + if: ${{ success() || !contains(needs.*.result, 'failure') }} + run: exit 0 + - name: One or more tests failed + if: ${{ contains(needs.*.result, 'failure') }} + run: exit 1 + + - name: debug-print + if: always() + run: | + echo "toJSON(needs) = ${{ toJSON(needs) }}" + echo "toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" diff --git a/.nf-core.yml b/.nf-core.yml index 23f2f93f..b56490ce 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,2 +1,2 @@ repository_type: modules -org_path: nf-core-test +org_path: mskcc diff --git a/modules/mskcc/bwa/index/main.nf b/modules/mskcc/bwa/index/main.nf new file mode 100644 index 00000000..c30d194d --- /dev/null +++ b/modules/mskcc/bwa/index/main.nf @@ -0,0 +1,53 @@ +process BWA_INDEX { + tag "$fasta" + label 'process_single' + + conda "bioconda::bwa=0.7.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' : + 'biocontainers/bwa:0.7.17--hed695b0_7' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path(bwa) , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${fasta.baseName}" + def args = task.ext.args ?: '' + """ + mkdir bwa + bwa \\ + index \\ + $args \\ + -p bwa/${prefix} \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${fasta.baseName}" + """ + mkdir bwa + + touch bwa/${prefix}.amb + touch bwa/${prefix}.ann + touch bwa/${prefix}.bwt + touch bwa/${prefix}.pac + touch bwa/${prefix}.sa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ +} diff --git a/modules/mskcc/bwa/index/meta.yml b/modules/mskcc/bwa/index/meta.yml new file mode 100644 index 00000000..2c6cfcd7 --- /dev/null +++ b/modules/mskcc/bwa/index/meta.yml @@ -0,0 +1,42 @@ +name: bwa_index +description: Create BWA index for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file +output: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: BWA genome index files + pattern: "*.{amb,ann,bwt,pac,sa}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@maxulysse" diff --git a/modules/mskcc/bwa/mem/main.nf b/modules/mskcc/bwa/mem/main.nf new file mode 100644 index 00000000..8ba99dfd --- /dev/null +++ b/modules/mskcc/bwa/mem/main.nf @@ -0,0 +1,55 @@ +process BWA_MEM { + tag "$meta.id" + label 'process_high' + + conda "bioconda::bwa=0.7.17 bioconda::samtools=1.16.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' : + 'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(index) + val sort_bam + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def samtools_command = sort_bam ? 'sort' : 'view' + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + + bwa mem \\ + $args \\ + -t $task.cpus \\ + \$INDEX \\ + $reads \\ + | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/mskcc/bwa/mem/meta.yml b/modules/mskcc/bwa/mem/meta.yml new file mode 100644 index 00000000..62357bf8 --- /dev/null +++ b/modules/mskcc/bwa/mem/meta.yml @@ -0,0 +1,55 @@ +name: bwa_mem +description: Performs fastq alignment to a fasta reference using BWA +keywords: + - mem + - bwa + - alignment + - map + - fastq + - bam + - sam +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: BWA genome index files + pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@jeremy1805" diff --git a/modules/mskcc/gatk4/applybqsr/main.nf b/modules/mskcc/gatk4/applybqsr/main.nf new file mode 100644 index 00000000..e5e6bf99 --- /dev/null +++ b/modules/mskcc/gatk4/applybqsr/main.nf @@ -0,0 +1,51 @@ +process GATK4_APPLYBQSR { + tag "$meta.id" + label 'process_low' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals) + path fasta + path fai + path dict + + output: + tuple val(meta), path("*.bam") , emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def interval_command = intervals ? "--intervals $intervals" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + ApplyBQSR \\ + --input $input \\ + --output ${prefix}.${input.getExtension()} \\ + --reference $fasta \\ + --bqsr-recal-file $bqsr_table \\ + $interval_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/mskcc/gatk4/applybqsr/meta.yml b/modules/mskcc/gatk4/applybqsr/meta.yml new file mode 100644 index 00000000..2085fa97 --- /dev/null +++ b/modules/mskcc/gatk4/applybqsr/meta.yml @@ -0,0 +1,74 @@ +name: gatk4_applybqsr +description: Apply base quality score recalibration (BQSR) to a bam file +keywords: + - bam + - base quality score recalibration + - bqsr + - cram + - gatk4 +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - bqsr_table: + type: file + description: Recalibration table from gatk4_baserecalibrator + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: Recalibrated BAM file + pattern: "*.{bam}" + - cram: + type: file + description: Recalibrated CRAM file + pattern: "*.{cram}" + +authors: + - "@yocra3" + - "@FriederikeHanssen" diff --git a/modules/mskcc/gatk4/applybqsrspark/main.nf b/modules/mskcc/gatk4/applybqsrspark/main.nf new file mode 100644 index 00000000..7a4c29bb --- /dev/null +++ b/modules/mskcc/gatk4/applybqsrspark/main.nf @@ -0,0 +1,51 @@ +process GATK4_APPLYBQSR_SPARK { + tag "$meta.id" + label 'process_low' + + conda "bioconda::gatk4=4.3.0.0 conda-forge::openjdk=8.0.312" + container "nf-core/gatk:4.4.0.0" + + input: + tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals) + path fasta + path fai + path dict + + output: + tuple val(meta), path("*.bam") , emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def interval_command = intervals ? "--intervals $intervals" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK ApplyBQSRSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk \\ + --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + ApplyBQSRSpark \\ + --input $input \\ + --output ${prefix}.${input.getExtension()} \\ + --reference $fasta \\ + --bqsr-recal-file $bqsr_table \\ + $interval_command \\ + --spark-master local[${task.cpus}] \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/mskcc/gatk4/applybqsrspark/meta.yml b/modules/mskcc/gatk4/applybqsrspark/meta.yml new file mode 100644 index 00000000..b253fc78 --- /dev/null +++ b/modules/mskcc/gatk4/applybqsrspark/meta.yml @@ -0,0 +1,76 @@ +name: gatk4_applybqsr_spark +description: Apply base quality score recalibration (BQSR) to a bam file +keywords: + - bam + - base quality score recalibration + - bqsr + - cram + - gatk4 + - spark +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - bqsr_table: + type: file + description: Recalibration table from gatk4_baserecalibrator + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: Recalibrated BAM file + pattern: "*.{bam}" + - cram: + type: file + description: Recalibrated CRAM file + pattern: "*.{cram}" + +authors: + - "@yocra3" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/mskcc/gatk4/baserecalibrator/main.nf b/modules/mskcc/gatk4/baserecalibrator/main.nf new file mode 100644 index 00000000..5375289a --- /dev/null +++ b/modules/mskcc/gatk4/baserecalibrator/main.nf @@ -0,0 +1,53 @@ +process GATK4_BASERECALIBRATOR { + tag "$meta.id" + label 'process_low' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(input), path(input_index), path(intervals) + path fasta + path fai + path dict + path known_sites + path known_sites_tbi + + output: + tuple val(meta), path("*.table"), emit: table + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def interval_command = intervals ? "--intervals $intervals" : "" + def sites_command = known_sites.collect{"--known-sites $it"}.join(' ') + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + BaseRecalibrator \\ + --input $input \\ + --output ${prefix}.table \\ + --reference $fasta \\ + $interval_command \\ + $sites_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/mskcc/gatk4/baserecalibrator/meta.yml b/modules/mskcc/gatk4/baserecalibrator/meta.yml new file mode 100644 index 00000000..db4fecfc --- /dev/null +++ b/modules/mskcc/gatk4/baserecalibrator/meta.yml @@ -0,0 +1,76 @@ +name: gatk4_baserecalibrator +description: Generate recalibration table for Base Quality Score Recalibration (BQSR) +keywords: + - base quality score recalibration + - table + - bqsr + - gatk4 + - sort +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - known_sites: + type: file + description: VCF files with known sites for indels / snps (optional) + pattern: "*.vcf.gz" + - known_sites_tbi: + type: file + description: Tabix index of the known_sites (optional) + pattern: "*.vcf.gz.tbi" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - table: + type: file + description: Recalibration table from BaseRecalibrator + pattern: "*.{table}" + +authors: + - "@yocra3" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/mskcc/gatk4/baserecalibratorspark/main.nf b/modules/mskcc/gatk4/baserecalibratorspark/main.nf new file mode 100644 index 00000000..6db088bb --- /dev/null +++ b/modules/mskcc/gatk4/baserecalibratorspark/main.nf @@ -0,0 +1,52 @@ +process GATK4_BASERECALIBRATOR_SPARK { + tag "$meta.id" + label 'process_low' + + conda "bioconda::gatk4=4.4.0.0 conda-forge::openjdk=8.0.312" + container "nf-core/gatk:4.4.0.0" + + input: + tuple val(meta), path(input), path(input_index), path(intervals) + path fasta + path fai + path dict + path known_sites + path known_sites_tbi + + output: + tuple val(meta), path("*.table"), emit: table + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def interval_command = intervals ? "--intervals $intervals" : "" + def sites_command = known_sites.collect{"--known-sites $it"}.join(' ') + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK BaseRecalibratorSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + BaseRecalibratorSpark \\ + --input $input \\ + --output ${prefix}.table \\ + --reference $fasta \\ + $interval_command \\ + $sites_command \\ + --spark-master local[${task.cpus}] \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/mskcc/gatk4/baserecalibratorspark/meta.yml b/modules/mskcc/gatk4/baserecalibratorspark/meta.yml new file mode 100644 index 00000000..d175ca13 --- /dev/null +++ b/modules/mskcc/gatk4/baserecalibratorspark/meta.yml @@ -0,0 +1,77 @@ +name: gatk4_baserecalibrator_spark +description: Generate recalibration table for Base Quality Score Recalibration (BQSR) +keywords: + - base quality score recalibration + - table + - bqsr + - gatk4 + - sort + - spark +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - known_sites: + type: file + description: VCF files with known sites for indels / snps (optional) + pattern: "*.vcf.gz" + - known_sites_tbi: + type: file + description: Tabix index of the known_sites (optional) + pattern: "*.vcf.gz.tbi" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - table: + type: file + description: Recalibration table from BaseRecalibrator + pattern: "*.{table}" + +authors: + - "@yocra3" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/mskcc/gatk4/createsequencedictionary/main.nf b/modules/mskcc/gatk4/createsequencedictionary/main.nf new file mode 100644 index 00000000..3e4efdd9 --- /dev/null +++ b/modules/mskcc/gatk4/createsequencedictionary/main.nf @@ -0,0 +1,52 @@ +process GATK4_CREATESEQUENCEDICTIONARY { + tag "$fasta" + label 'process_medium' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path('*.dict') , emit: dict + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def avail_mem = 6144 + if (!task.memory) { + log.info '[GATK CreateSequenceDictionary] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CreateSequenceDictionary \\ + --REFERENCE $fasta \\ + --URI $fasta \\ + --TMP_DIR . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${fasta.baseName}.dict + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/mskcc/gatk4/createsequencedictionary/meta.yml b/modules/mskcc/gatk4/createsequencedictionary/meta.yml new file mode 100644 index 00000000..9b8b8c89 --- /dev/null +++ b/modules/mskcc/gatk4/createsequencedictionary/meta.yml @@ -0,0 +1,40 @@ +name: gatk4_createsequencedictionary +description: Creates a sequence dictionary for a reference sequence +keywords: + - createsequencedictionary + - dictionary + - fasta + - gatk4 +tools: + - gatk: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Input fasta file + pattern: "*.{fasta,fa}" +output: + - dict: + type: file + description: gatk dictionary file + pattern: "*.{dict}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" + - "@ramprasadn" diff --git a/modules/mskcc/gatk4/markduplicates/main.nf b/modules/mskcc/gatk4/markduplicates/main.nf new file mode 100644 index 00000000..e4c01f9a --- /dev/null +++ b/modules/mskcc/gatk4/markduplicates/main.nf @@ -0,0 +1,68 @@ +process GATK4_MARKDUPLICATES { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::gatk4=4.4.0.0 bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-d9e7bad0f7fbc8f4458d5c3ab7ffaaf0235b59fb:f857e2d6cc88d35580d01cf39e0959a68b83c1d9-0': + 'biocontainers/mulled-v2-d9e7bad0f7fbc8f4458d5c3ab7ffaaf0235b59fb:f857e2d6cc88d35580d01cf39e0959a68b83c1d9-0' }" + + input: + tuple val(meta), path(bam) + path fasta + path fasta_fai + + output: + tuple val(meta), path("*cram"), emit: cram, optional: true + tuple val(meta), path("*bam"), emit: bam, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.bai"), emit: bai, optional: true + tuple val(meta), path("*.metrics"), emit: metrics + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}.bam" + + // If the extension is CRAM, then change it to BAM + prefix_bam = prefix.tokenize('.')[-1] == 'cram' ? "${prefix.substring(0, prefix.lastIndexOf('.'))}.bam" : prefix + + def input_list = bam.collect{"--INPUT $it"}.join(' ') + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + + // Using samtools and not Markduplicates to compress to CRAM speeds up computation: + // https://medium.com/@acarroll.dna/looking-at-trade-offs-in-compression-levels-for-genomics-tools-eec2834e8b94 + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + MarkDuplicates \\ + $input_list \\ + --OUTPUT ${prefix_bam} \\ + --METRICS_FILE ${prefix}.metrics \\ + --TMP_DIR . \\ + ${reference} \\ + $args + + # If cram files are wished as output, the run samtools for conversion + if [[ ${prefix} == *.cram ]]; then + samtools view -Ch -T ${fasta} -o ${prefix} ${prefix_bam} + rm ${prefix_bam} + samtools index ${prefix} + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/mskcc/gatk4/markduplicates/meta.yml b/modules/mskcc/gatk4/markduplicates/meta.yml new file mode 100644 index 00000000..d3e75505 --- /dev/null +++ b/modules/mskcc/gatk4/markduplicates/meta.yml @@ -0,0 +1,73 @@ +name: gatk4_markduplicates +description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. +keywords: + - bam + - gatk4 + - markduplicates + - sort +tools: + - gatk4: + description: + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM file + pattern: "*.{bam}" + - fasta: + type: file + description: Fasta file + pattern: "*.{fasta}" + - fasta_fai: + type: file + description: Fasta index file + pattern: "*.{fai}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: Marked duplicates BAM file + pattern: "*.{bam}" + - cram: + type: file + description: Marked duplicates CRAM file + pattern: "*.{cram}" + - bai: + type: file + description: BAM index file + pattern: "*.{bam.bai}" + - crai: + type: file + description: CRAM index file + pattern: "*.{cram.crai}" + - metrics: + type: file + description: Duplicate metrics file generated by GATK + pattern: "*.{metrics.txt}" + +authors: + - "@ajodeh-juma" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/mskcc/gatk4/markduplicatesspark/main.nf b/modules/mskcc/gatk4/markduplicatesspark/main.nf new file mode 100644 index 00000000..f318ed50 --- /dev/null +++ b/modules/mskcc/gatk4/markduplicatesspark/main.nf @@ -0,0 +1,50 @@ +process GATK4_MARKDUPLICATES_SPARK { + tag "$meta.id" + label 'process_high' + + conda "bioconda::gatk4=4.4.0.0 conda-forge::openjdk=8.0.312" + container "nf-core/gatk:4.4.0.0" + + input: + tuple val(meta), path(bam) + path fasta + path fasta_fai + path dict + + output: + tuple val(meta), path("${prefix}"), emit: output + tuple val(meta), path("${prefix}.bai"), emit: bam_index, optional:true + tuple val(meta), path("*.metrics"), emit: metrics, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def input_list = bam.collect{"--input $it"}.join(' ') + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + MarkDuplicatesSpark \\ + $input_list \\ + --output $prefix \\ + --reference $fasta \\ + --spark-master local[${task.cpus}] \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + openjdk: \$(echo \$(java -version 2>&1) | grep version | sed 's/\"//g' | cut -f3 -d ' ') + END_VERSIONS + """ +} diff --git a/modules/mskcc/gatk4/markduplicatesspark/meta.yml b/modules/mskcc/gatk4/markduplicatesspark/meta.yml new file mode 100644 index 00000000..c9bb263a --- /dev/null +++ b/modules/mskcc/gatk4/markduplicatesspark/meta.yml @@ -0,0 +1,67 @@ +name: gatk4_markduplicates_spark +description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. +keywords: + - bam + - gatk4 + - markduplicates + - sort + - spark +tools: + - gatk4: + description: + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM file + pattern: "*.{bam}" + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - output: + type: file + description: Marked duplicates BAM/CRAM file + pattern: "*.{bam,cram}" + - bam_index: + type: file + description: Optional BAM index file + pattern: "*.bai" + +authors: + - "@ajodeh-juma" + - "@FriederikeHanssen" + - "@maxulysse" + - "@SusiJo" diff --git a/modules/mskcc/samtools/faidx/main.nf b/modules/mskcc/samtools/faidx/main.nf new file mode 100644 index 00000000..59ed3088 --- /dev/null +++ b/modules/mskcc/samtools/faidx/main.nf @@ -0,0 +1,50 @@ +process SAMTOOLS_FAIDX { + tag "$fasta" + label 'process_single' + + conda "bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(fai) + + output: + tuple val(meta), path ("*.{fa,fasta}") , emit: fa , optional: true + tuple val(meta), path ("*.fai") , emit: fai, optional: true + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + faidx \\ + $fasta \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll() + def fastacmd = match[0] ? "touch ${match[0][1]}" : '' + """ + ${fastacmd} + touch ${fasta}.fai + + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/mskcc/samtools/faidx/meta.yml b/modules/mskcc/samtools/faidx/meta.yml new file mode 100644 index 00000000..957b25e5 --- /dev/null +++ b/modules/mskcc/samtools/faidx/meta.yml @@ -0,0 +1,57 @@ +name: samtools_faidx +description: Index FASTA file +keywords: + - index + - fasta + - faidx +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@phue" diff --git a/modules/mskcc/samtools/index/main.nf b/modules/mskcc/samtools/index/main.nf new file mode 100644 index 00000000..0b20aa4b --- /dev/null +++ b/modules/mskcc/samtools/index/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda "bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus-1} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${input}.bai + touch ${input}.crai + touch ${input}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/mskcc/samtools/index/meta.yml b/modules/mskcc/samtools/index/meta.yml new file mode 100644 index 00000000..8bd2fa6f --- /dev/null +++ b/modules/mskcc/samtools/index/meta.yml @@ -0,0 +1,53 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + type: file + description: CSI index file + pattern: "*.{csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core-test/.gitkeep b/modules/nf-core-test/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/modules/nf-core-test/fakeqc/main.nf b/modules/nf-core-test/fakeqc/main.nf deleted file mode 100644 index 8b137891..00000000 --- a/modules/nf-core-test/fakeqc/main.nf +++ /dev/null @@ -1 +0,0 @@ - diff --git a/modules/nf-core-test/fakeqc/meta.yml b/modules/nf-core-test/fakeqc/meta.yml deleted file mode 100644 index 8b137891..00000000 --- a/modules/nf-core-test/fakeqc/meta.yml +++ /dev/null @@ -1 +0,0 @@ - diff --git a/modules/nf-core-test/fastqc/main.nf b/modules/nf-core-test/fastqc/main.nf deleted file mode 100644 index 7777e0aa..00000000 --- a/modules/nf-core-test/fastqc/main.nf +++ /dev/null @@ -1,59 +0,0 @@ -process FASTQC { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'biocontainers/fastqc:0.11.9--0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - // Add soft-links to original FastQs for consistent naming in pipeline - def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz - fastqc $args --threads $task.cpus ${prefix}.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } else { - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - fastqc $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.html - touch ${prefix}.zip - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core-test/fastqc/meta.yml b/modules/nf-core-test/fastqc/meta.yml deleted file mode 100644 index 4da5bb5a..00000000 --- a/modules/nf-core-test/fastqc/meta.yml +++ /dev/null @@ -1,52 +0,0 @@ -name: fastqc -description: Run FastQC on sequenced reads -keywords: - - quality control - - qc - - adapters - - fastq -tools: - - fastqc: - description: | - FastQC gives general quality metrics about your reads. - It provides information about the quality score distribution - across your reads, the per base sequence content (%A/C/G/T). - You get information about adapter contamination and other - overrepresented sequences. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ - documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ - licence: ["GPL-2.0-only"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - html: - type: file - description: FastQC report - pattern: "*_{fastqc.html}" - - zip: - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" diff --git a/modules/nf-core-test/multiqc/main.nf b/modules/nf-core-test/multiqc/main.nf deleted file mode 100644 index c6d1fe52..00000000 --- a/modules/nf-core-test/multiqc/main.nf +++ /dev/null @@ -1,49 +0,0 @@ -process MULTIQC { - label 'process_medium' - - conda (params.enable_conda ? 'bioconda::multiqc=1.12' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : - 'biocontainers/multiqc:1.12--pyhdfd78af_0' }" - - input: - path multiqc_files, stageAs: "?/*" - tuple path(multiqc_config), path(multiqc_logo) - - output: - path "*multiqc_report.html", emit: report - path "*_data" , emit: data - path "*_plots" , optional:true, emit: plots - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def config = multiqc_config ? "--config $multiqc_config" : '' - """ - multiqc \\ - --force \\ - $config \\ - $args \\ - . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - """ - - stub: - """ - touch multiqc_data - touch multiqc_plots - touch multiqc_report.html - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core-test/multiqc/meta.yml b/modules/nf-core-test/multiqc/meta.yml deleted file mode 100644 index d385b4d2..00000000 --- a/modules/nf-core-test/multiqc/meta.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: MultiQC -description: Aggregate results from bioinformatics analyses across many samples into a single report -keywords: - - QC - - bioinformatics tools - - Beautiful stand-alone HTML report -tools: - - multiqc: - description: | - MultiQC searches a given directory for analysis logs and compiles a HTML report. - It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. - homepage: https://multiqc.info/ - documentation: https://multiqc.info/docs/ - licence: ["GPL-3.0-or-later"] -input: - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - - multiqc_config: - type: file - description: Config yml for MultiQC - pattern: "*.{yml,yaml}" - - multiqc_logo: - type: file - description: Logo file for MultiQC - pattern: "*.{png}" -output: - - report: - type: file - description: MultiQC report file - pattern: "multiqc_report.html" - - data: - type: directory - description: MultiQC data dir - pattern: "multiqc_data" - - plots: - type: file - description: Plots created by MultiQC - pattern: "*_data" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@abhi18av" - - "@bunop" - - "@drpatelh" diff --git a/modules/yaml-schema.json b/modules/yaml-schema.json index 24516d54..fcb05aef 100644 --- a/modules/yaml-schema.json +++ b/modules/yaml-schema.json @@ -1,146 +1,160 @@ { - "$schema": "http://json-schema.org/draft-07/schema", - "title": "Meta yaml", - "description": "Validate the meta yaml file for an nf-core module", - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "Name of the module" - }, - "description": { - "type": "string", - "description": "Description of the module" - }, - "keywords": { - "type": "array", - "description": "Keywords for the module", - "items": { - "type": "string" - }, - "minItems": 3 - }, - "authors": { - "type": "array", - "description": "Authors of the module", - "items": { - "type": "string" - } - }, - "input": { - "type": "array", - "description": "Input channels for the module", + "$schema": "http://json-schema.org/draft-07/schema", + "title": "Meta yaml", + "description": "Validate the meta yaml file for an nf-core module", + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name of the module" + }, + "description": { + "type": "string", + "description": "Description of the module" + }, + "keywords": { + "type": "array", + "description": "Keywords for the module", + "items": { + "type": "string" + }, + "minItems": 3 + }, + "authors": { + "type": "array", + "description": "Authors of the module", + "items": { + "type": "string" + } + }, + "input": { + "type": "array", + "description": "Input channels for the module", - "items": { - "type": "object", - "patternProperties": { - ".*": { - "type": "object", - "properties": { - "type": { - "type": "string", - "description": "Type of the input channel", - "enum": ["map", "file", "directory", "string", "integer", "float"] - }, - "description": { - "type": "string", - "description": "Description of the input channel" - }, - "pattern": { - "type": "string", - "description": "Pattern of the input channel, given in Java glob syntax" - }, - "default": { - "type": ["string", "number", "boolean", "array", "object"], - "description": "Default value for the input channel" - } - }, - "required": ["type", "description"] - } - } - } - }, - "output": { - "type": "array", - "description": "Output channels for the module", - "items": { - "type": "object", - "patternProperties": { - ".*": { - "type": "object", - "properties": { - "type": { - "type": "string", - "description": "Type of the output channel", - "enum": ["map", "file", "directory", "string", "integer", "float"] - }, - "description": { - "type": "string", - "description": "Description of the output channel" - }, - "pattern": { - "type": "string", - "description": "Pattern of the input channel, given in Java glob syntax" - } - }, - "required": ["type", "description"] - } - } - } - }, - "tools": { - "type": "array", - "description": "Tools used by the module", - "items": { - "type": "object", - "patternProperties": { - ".*": { - "type": "object", - "properties": { - "description": { - "type": "string", - "description": "Description of the output channel" - }, - "homepage": { - "type": "string", - "description": "Homepage of the tool", - "pattern": "^(http|https)://.*$" - }, - "documentation": { - "type": "string", - "description": "Documentation of the tool", - "pattern": "^(http|https)://.*$" - }, - "tool_dev_url": { - "type": "string", - "description": "URL of the development version of the tool's documentation", - "pattern": "^(http|https)://.*$" - }, - "doi": { - "type": "string", - "description": "DOI of the tool", - "pattern": "^10.\\d{4,9}\\/[^,]+$" - }, - "licence": { - "type": ["array", "string"], - "description": "Licence of the tool", - "items": { - "type": "string" - }, - "uniqueItems": true - } - }, - "required": ["description"], - "anyOf": [ - { "required": ["homepage"] }, - { "required": ["documentation"] }, - { "required": ["tool_dev_url"] }, - { "required": ["doi"] } - ] - } - } - } + "items": { + "type": "object", + "patternProperties": { + ".*": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "Type of the input channel", + "enum": [ + "map", + "file", + "directory", + "string", + "integer", + "float" + ] + }, + "description": { + "type": "string", + "description": "Description of the input channel" + }, + "pattern": { + "type": "string", + "description": "Pattern of the input channel, given in Java glob syntax" + }, + "default": { + "type": ["string", "number", "boolean", "array", "object"], + "description": "Default value for the input channel" + } + }, + "required": ["type", "description"] + } } + } }, - "required": ["name", "description", "keywords", "authors", "output", "tools"] -} \ No newline at end of file + "output": { + "type": "array", + "description": "Output channels for the module", + "items": { + "type": "object", + "patternProperties": { + ".*": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "Type of the output channel", + "enum": [ + "map", + "file", + "directory", + "string", + "integer", + "float" + ] + }, + "description": { + "type": "string", + "description": "Description of the output channel" + }, + "pattern": { + "type": "string", + "description": "Pattern of the input channel, given in Java glob syntax" + } + }, + "required": ["type", "description"] + } + } + } + }, + "tools": { + "type": "array", + "description": "Tools used by the module", + "items": { + "type": "object", + "patternProperties": { + ".*": { + "type": "object", + "properties": { + "description": { + "type": "string", + "description": "Description of the output channel" + }, + "homepage": { + "type": "string", + "description": "Homepage of the tool", + "pattern": "^(http|https)://.*$" + }, + "documentation": { + "type": "string", + "description": "Documentation of the tool", + "pattern": "^(http|https)://.*$" + }, + "tool_dev_url": { + "type": "string", + "description": "URL of the development version of the tool's documentation", + "pattern": "^(http|https)://.*$" + }, + "doi": { + "type": "string", + "description": "DOI of the tool", + "pattern": "^10.\\d{4,9}\\/[^,]+$" + }, + "licence": { + "type": ["array", "string"], + "description": "Licence of the tool", + "items": { + "type": "string" + }, + "uniqueItems": true + } + }, + "required": ["description"], + "anyOf": [ + { "required": ["homepage"] }, + { "required": ["documentation"] }, + { "required": ["tool_dev_url"] }, + { "required": ["doi"] } + ] + } + } + } + } + }, + "required": ["name", "description", "keywords", "authors", "output", "tools"] +} diff --git a/subworkflows/mskcc/bwa_markdup_bqsr/main.nf b/subworkflows/mskcc/bwa_markdup_bqsr/main.nf new file mode 100755 index 00000000..d8b6c330 --- /dev/null +++ b/subworkflows/mskcc/bwa_markdup_bqsr/main.nf @@ -0,0 +1,107 @@ +include { BWA_MEM } from '../../../modules/mskcc/bwa/mem/main' +include { GATK4_MARKDUPLICATES } from '../../../modules/mskcc/gatk4/markduplicates/main' +include { GATK4_MARKDUPLICATES_SPARK } from '../../../modules/mskcc/gatk4/markduplicatesspark/main' +include { GATK4_APPLYBQSR } from '../../../modules/mskcc/gatk4/applybqsr/main' +include { GATK4_APPLYBQSR_SPARK } from '../../../modules/mskcc/gatk4/applybqsrspark/main' +include { GATK4_BASERECALIBRATOR } from '../../../modules/mskcc/gatk4/baserecalibrator/main' +include { GATK4_BASERECALIBRATOR_SPARK } from '../../../modules/mskcc/gatk4/baserecalibratorspark/main' +include { SAMTOOLS_INDEX } from '../../../modules/mskcc/samtools/index/main' + + +workflow BWA_MARKDUP_BQSR { + take: + reads + fasta + fai + bwa_index + dict + known_sites + known_sites_tbi + spark // true, false + + main: + + ch_versions = Channel.empty() + + // run once for each pair of reads + BWA_MEM( + reads, + bwa_index, + true + ) + ch_versions = ch_versions.mix(BWA_MEM.out.versions.first()) + + grouped_bam_ch = + BWA_MEM.out.bam + .map{meta, bam -> + def new_meta = meta - meta.subMap(['read_group']) + [ new_meta, bam ] + }.groupTuple(by:0) + + if (spark){ + GATK4_MARKDUPLICATES_SPARK( + grouped_bam_ch, + fasta, + fai + ) + GATK4_BASERECALIBRATOR_SPARK( + GATK4_MARKDUPLICATES_SPARK.out.output, + fasta, + fai, + dict, + known_sites, + known_sites_tbi + ) + GATK4_APPLYBQSR_SPARK( + GATK4_MARKDUPLICATES_SPARK.out.output + .join(GATK4_MARKDUPLICATES_SPARK.out.bam_index) + .join(GATK4_BASERECALIBRATOR_SPARK.out.table) + .map{meta, bam, bai, table -> + [ meta, bam, bai, table, [] ] // what is intervals? + }, + fasta, + fai, + dict + ) + bqsr_bam = GATK4_APPLYBQSR_SPARK.out.bam + + } else { + GATK4_MARKDUPLICATES( + grouped_bam_ch, + fasta, + fai + ) + GATK4_BASERECALIBRATOR( + GATK4_MARKDUPLICATES.out.bam, + fasta, + fai, + dict, + known_sites, + known_sites_tbi + ) + GATK4_APPLYBQSR( + GATK4_MARKDUPLICATES.out.bam + .join(GATK4_MARKDUPLICATES.out.bai) + .join(GATK4_BASERECALIBRATOR.out.table) + .map{meta, bam, bai, table -> + [ meta, bam, bai, table, [] ] // what is intervals? + }, + fasta, + fai, + dict + ) + bqsr_bam = GATK4_APPLYBQSR.out.bam + // bqsr_bai = GATK4_APPLYBQSR.out.bai + } + + SAMTOOLS_INDEX(bqsr_bam) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + bqsr_bai = SAMTOOLS_INDEX.out.bai + + emit: + bam = bqsr_bam + bai = bqsr_bai + versions = ch_versions + +} diff --git a/subworkflows/mskcc/bwa_markdup_bqsr/meta.yml b/subworkflows/mskcc/bwa_markdup_bqsr/meta.yml new file mode 100755 index 00000000..67f28c24 --- /dev/null +++ b/subworkflows/mskcc/bwa_markdup_bqsr/meta.yml @@ -0,0 +1,76 @@ +name: bam_markdup_bqsr +description: A subworkflow for generating a BAM file from FASTQ +keywords: + - bam + - alignment + - markduplicates + - bqsr + - duplicates + +components: + - bwa/mem + - gatk4/markduplicates + - gatk4/applybqsr + - gatk4/baserecalibrator + - samtools/index + - gatk4/markduplicatesspark + - gatk4/applybqsrspark + - gatk4/baserecalibratorspark + +input: + - reads: + description: | + An input channel containing fastq.gz files + Structure: [ val(meta), path(reads)] + pattern: "*.{fastq.gz}" + - fasta: + description: | + A channel containing the reference FASTA file + Structure: [ path(fasta) ] + pattern: "*.{fasta,fa}" + - fai: + description: | + A channel containing the index of the reference FASTA file + Structure: [ path(fai) ] + pattern: "*.{fai}" + - bwa_index: + description: | + A channel containing bwa index reference, which can be created using bwa/index + Structure: [ val(meta2), path(bwa_index) ] + pattern: "*.{amb,ann,bwt,pac,sa}" + - dict: + description: | + A channel containing a sequence dictionary file (`dict`), which can be created using gatk4/createsequencedictionary + Structure: [ path(blacklist) ] + pattern: "*.{dict}" + - known_sites: + description: | + A channel containing one or more files containing known polymorphic sites that should be excluded during base recalibration. + Structure: [ path([known_sites_1, known_sites_2]) ] + pattern: "*.vcf.gz" + - known_sites_tbi: + description: | + A channel containing tabix index files of the known_sites files + Structure: [ path([known_sites_1_tbi, known_sites_2_tbi]) ] + pattern: "*.vcf.gz.tbi" + - spark: + type: boolean + description: | + If true use spark gatk4 modules: GATK4_MARKDUPLICATES_SPARK, GATK4_APPLYBQSR_SPARK, GATK4_BASERECALIBRATOR_SPARK + pattern: "true|false" + +output: + - bam: + description: | + BAM file with marked duplicates and BQSR + Structure: [ val(meta), path(bam) ] + - bai: + description: | + BAM index file + Structure: [ val(meta), path(bai) ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@anoronh4" diff --git a/subworkflows/mskcc/bwa_markdup_bqsr/nextflow.config b/subworkflows/mskcc/bwa_markdup_bqsr/nextflow.config new file mode 100755 index 00000000..e18f96ae --- /dev/null +++ b/subworkflows/mskcc/bwa_markdup_bqsr/nextflow.config @@ -0,0 +1,27 @@ +process { + withName: '.*:BWA_MARKDUP_BQSR:BWA_MEM' { + ext.args = "-R "\${meta.read_group}" + ext.prefix = { "${meta.id}@${meta.read_group}" } + } + + withName: '.*:BWA_MARKDUP_BQSR:GATK4_MARKDUPLICATES.*' { + ext.args = { + [ + "--ASSUME_SORT_ORDER coordinate", + "--CREATE_INDEX true" + ].join(" ") + } + } + + withName: '.*:BWA_MARKDUP_BQSR:GATK4_BASERECALIBRATOR.*' { + ext.args = "--verbosity INFO" + } + + withName: '.*:BWA_MARKDUP_BQSR:GATK4_APPLYBQSR' { + ext.args = "--create-output-bam-index true --create-output-bam-index true" + } + + withName: '.*:BWA_MARKDUP_BQSR:GATK4_APPLYBQSR_SPARK' { + ext.args = "--create-output-bam-index true" + } +} \ No newline at end of file diff --git a/tests/config/nextflow.config b/tests/config/nextflow.config new file mode 100644 index 00000000..104f1d9e --- /dev/null +++ b/tests/config/nextflow.config @@ -0,0 +1,44 @@ +params { + outdir = "output/" + publish_dir_mode = "copy" + singularity_pull_docker_container = false + test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules' +} + +process { + cpus = 2 + memory = 3.GB + time = 2.h +} + +if ("$PROFILE" == "singularity") { + singularity.enabled = true + singularity.autoMounts = true +} else if ("$PROFILE" == "conda") { + conda.enabled = true +} else if ("$PROFILE" == "mamba") { + conda.enabled = true + conda.useMamba = true +} else if ("$PROFILE" == "podman") { + podman.enabled = true + podman.userEmulation = true + podman.runOptions = "--runtime crun --platform linux/x86_64 --systemd=always" +} else { + docker.enabled = true + docker.userEmulation = true + docker.runOptions = "--platform linux/x86_64" +} + +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Increase time available to build Conda environment +conda { createTimeout = "120 min" } + +// Load test_data.config containing paths to test data +includeConfig 'test_data.config' + +manifest { + nextflowVersion = '!>=23.04.0' +} diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml new file mode 100644 index 00000000..99543f96 --- /dev/null +++ b/tests/config/pytest_modules.yml @@ -0,0 +1,43 @@ +bwa/mem: + - modules/mskcc/bwa/mem/** + - tests/modules/mskcc/bwa/mem/** + +gatk4/applybqsr: + - modules/mskcc/gatk4/applybqsr/** + - tests/modules/mskcc/gatk4/applybqsr/** + +gatk4/applybqsrspark: + - modules/mskcc/gatk4/applybqsrspark/** + - tests/modules/mskcc/gatk4/applybqsrspark/** + +gatk4/baserecalibrator: + - modules/mskcc/gatk4/baserecalibrator/** + - tests/modules/mskcc/gatk4/baserecalibrator/** + +gatk4/baserecalibratorspark: + - modules/mskcc/gatk4/baserecalibratorspark/** + - tests/modules/mskcc/gatk4/baserecalibratorspark/** + +gatk4/createsequencedictionary: + - modules/mskcc/gatk4/createsequencedictionary/** + - tests/modules/mskcc/gatk4/createsequencedictionary/** + +gatk4/markduplicates: + - modules/mskcc/gatk4/markduplicates/** + - tests/modules/mskcc/gatk4/markduplicates/** + +gatk4/markduplicatesspark: + - modules/mskcc/gatk4/markduplicatesspark/** + - tests/modules/mskcc/gatk4/markduplicatesspark/** + +samtools/index: + - modules/mskcc/samtools/index/** + - tests/modules/mskcc/samtools/index/** + +samtools/faidx: + - modules/mskcc/samtools/faidx/** + - tests/modules/mskcc/samtools/faidx/** + +subworkflows/bwa_markdup_bqsr: + - subworkflows/mskcc/bwa_markdup_bqsr/** + - tests/subworkflows/mskcc/bwa_markdup_bqsr/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config new file mode 100644 index 00000000..1c9ed340 --- /dev/null +++ b/tests/config/test_data.config @@ -0,0 +1,713 @@ +// README: +// https://github.com/nf-core/test-datasets/blob/modules/README.md + +params { + // Base directory for test data + test_data_base = "https://raw.githubusercontent.com/nf-core/test-datasets/modules" + + test_data { + 'sarscov2' { + 'genome' { + genome_fasta = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.fasta" + genome_fasta_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.fasta.gz" + genome_fasta_fai = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.fasta.fai" + genome_fasta_txt_zst = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.fasta.txt.zst" + genome_dict = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.dict" + genome_gff3 = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.gff3" + genome_gff3_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.gff3.gz" + genome_gtf = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.gtf" + genome_paf = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.paf" + genome_sizes = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.sizes" + transcriptome_fasta = "${params.test_data_base}/data/genomics/sarscov2/genome/transcriptome.fasta" + proteome_fasta = "${params.test_data_base}/data/genomics/sarscov2/genome/proteome.fasta" + transcriptome_paf = "${params.test_data_base}/data/genomics/sarscov2/genome/transcriptome.paf" + + test_bed = "${params.test_data_base}/data/genomics/sarscov2/genome/bed/test.bed" + test_bed_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/bed/test.bed.gz" + test2_bed = "${params.test_data_base}/data/genomics/sarscov2/genome/bed/test2.bed" + test_bed12 = "${params.test_data_base}/data/genomics/sarscov2/genome/bed/test.bed12" + baits_bed = "${params.test_data_base}/data/genomics/sarscov2/genome/bed/baits.bed" + bed_autosql = "${params.test_data_base}/data/genomics/sarscov2/genome/bed/bed6alt.as" + + reference_cnn = "${params.test_data_base}/data/genomics/sarscov2/genome/cnn/reference.cnn" + + kraken2 = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kraken2" + kraken2_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kraken2.tar.gz" + + kraken2_bracken = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kraken2_bracken" + kraken2_bracken_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kraken2_bracken.tar.gz" + + kaiju = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kaiju" + kaiju_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kaiju.tar.gz" + + kofamscan_profiles_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kofamscan/profiles.tar.gz" + kofamscan_ko_list_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kofamscan/ko_list.gz" + + ncbi_taxmap_zip = "${params.test_data_base}/data/genomics/sarscov2/genome/db/maltextract/ncbi_taxmap.zip" + taxon_list_txt = "${params.test_data_base}/data/genomics/sarscov2/genome/db/maltextract/taxon_list.txt" + + mmseqs_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/db/mmseqs.tar.gz" + + all_sites_fas = "${params.test_data_base}/data/genomics/sarscov2/genome/alignment/all_sites.fas" + informative_sites_fas = "${params.test_data_base}/data/genomics/sarscov2/genome/alignment/informative_sites.fas" + + contigs_genome_maf_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz" + contigs_genome_par = "${params.test_data_base}/data/genomics/sarscov2/genome/alignment/last/contigs.genome.par" + lastdb_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/alignment/last/lastdb.tar.gz" + + baits_interval_list = "${params.test_data_base}/data/genomics/sarscov2/genome/picard/baits.interval_list" + targets_interval_list = "${params.test_data_base}/data/genomics/sarscov2/genome/picard/targets.interval_list" + regions_txt = "${params.test_data_base}/data/genomics/sarscov2/genome/graphtyper/regions.txt" + } + 'illumina' { + test_single_end_bam = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.single_end.bam" + test_single_end_sorted_bam = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.single_end.sorted.bam" + test_single_end_sorted_bam_bai = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai" + test_paired_end_bam = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.paired_end.bam" + test_paired_end_sorted_bam = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam" + test_paired_end_sorted_bam_bai = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai" + test_paired_end_methylated_bam = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.paired_end.methylated.bam" + test_paired_end_methylated_sorted_bam = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam" + test_paired_end_methylated_sorted_bam_bai = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam.bai" + test_unaligned_bam = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.unaligned.bam" + + test_1_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz" + test_2_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test_2.fastq.gz" + test_interleaved_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz" + test_1_fastq_txt_zst = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test_1.fastq.txt.zst" + test2_1_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test2_1.fastq.gz" + test2_2_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test2_2.fastq.gz" + test_methylated_1_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test.methylated_1.fastq.gz" + test_methylated_2_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test.methylated_2.fastq.gz" + + test_bedgraph = "${params.test_data_base}/data/genomics/sarscov2/illumina/bedgraph/test.bedgraph" + + test_bigwig = "${params.test_data_base}/data/genomics/sarscov2/illumina/bigwig/test.bigwig" + + test_wig_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/wig/test.wig.gz" + + test_baserecalibrator_table = "${params.test_data_base}/data/genomics/sarscov2/illumina/gatk/test.baserecalibrator.table" + + test_computematrix_mat_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/deeptools/test.computeMatrix.mat.gz" + + test_bcf = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test.bcf" + + test_vcf = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test.vcf" + test_vcf_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test.vcf.gz" + test_vcf_gz_tbi = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi" + test2_vcf = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test2.vcf" + test2_vcf_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test2.vcf.gz" + test2_vcf_gz_tbi = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi" + test2_vcf_targets_tsv_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz" + test3_vcf = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test3.vcf" + test3_vcf_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test3.vcf.gz" + test3_vcf_gz_tbi = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi" + + contigs_fasta = "${params.test_data_base}/data/genomics/sarscov2/illumina/fasta/contigs.fasta" + scaffolds_fasta = "${params.test_data_base}/data/genomics/sarscov2/illumina/fasta/scaffolds.fasta" + + assembly_gfa = "${params.test_data_base}/data/genomics/sarscov2/illumina/gfa/assembly.gfa" + assembly_gfa_bgz = "${params.test_data_base}/data/genomics/sarscov2/illumina/gfa/assembly.gfa.bgz" + assembly_gfa_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/gfa/assembly.gfa.gz" + assembly_gfa_zst = "${params.test_data_base}/data/genomics/sarscov2/illumina/gfa/assembly.gfa.zst" + + test_single_end_bam_readlist_txt = "${params.test_data_base}/data/genomics/sarscov2/illumina/picard/test.single_end.bam.readlist.txt" + + SRR13255544_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/sra/SRR13255544.tar.gz" + SRR11140744_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/sra/SRR11140744.tar.gz" + } + 'nanopore' { + test_sorted_bam = "${params.test_data_base}/data/genomics/sarscov2/nanopore/bam/test.sorted.bam" + test_sorted_bam_bai = "${params.test_data_base}/data/genomics/sarscov2/nanopore/bam/test.sorted.bam.bai" + + fast5_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/nanopore/fast5/fast5.tar.gz" + + test_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/nanopore/fastq/test.fastq.gz" + + test_sequencing_summary = "${params.test_data_base}/data/genomics/sarscov2/nanopore/sequencing_summary/test.sequencing_summary.txt" + } + 'metagenome' { + classified_reads_assignment = "${params.test_data_base}/data/genomics/sarscov2/metagenome/test_1.kraken2.reads.txt" + kraken_report = "${params.test_data_base}/data/genomics/sarscov2/metagenome/test_1.kraken2.report.txt" + krona_taxonomy = "${params.test_data_base}/data/genomics/sarscov2/metagenome/krona_taxonomy.tab" + seqid2taxid_map = "${params.test_data_base}/data/genomics/sarscov2/metagenome/seqid2taxid.map" + nodes_dmp = "${params.test_data_base}/data/genomics/sarscov2/metagenome/nodes.dmp" + names_dmp = "${params.test_data_base}/data/genomics/sarscov2/metagenome/names.dmp" + } + } + 'mus_musculus' { + 'genome' { + rnaseq_samplesheet = "${params.test_data_base}/data/genomics/mus_musculus/rnaseq_expression/SRP254919.samplesheet.csv" + rnaseq_genemeta = "${params.test_data_base}/data/genomics/mus_musculus/rnaseq_expression/SRP254919.gene_meta.tsv" + rnaseq_contrasts = "${params.test_data_base}/data/genomics/mus_musculus/rnaseq_expression/SRP254919.contrasts.csv" + rnaseq_matrix = "${params.test_data_base}/data/genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv" + deseq_results = "${params.test_data_base}/data/genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.deseq2.results.tsv" + } + 'illumina' { + test_1_fastq_gz = "${params.test_data_base}/data/genomics/mus_musculus/mageck/ERR376998.small.fastq.gz" + test_2_fastq_gz = "${params.test_data_base}/data/genomics/mus_musculus/mageck/ERR376999.small.fastq.gz" + } + 'csv' { + count_table = "${params.test_data_base}/data/genomics/mus_musculus/mageck/count_table.csv" + library = "${params.test_data_base}/data/genomics/mus_musculus/mageck/yusa_library.csv" + } + 'txt' { + design_matrix = "${params.test_data_base}/data/genomics/mus_musculus/mageck/design_matrix.txt" + } + } + 'homo_sapiens' { + '10xgenomics' { + cellranger { + test_10x_10k_pbmc_5fb_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/5gex/5fb/subsampled_sc5p_v2_hs_PBMC_10k_5fb_S1_L001_R1_001.fastq.gz" + test_10x_10k_pbmc_5fb_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/5gex/5fb/subsampled_sc5p_v2_hs_PBMC_10k_5fb_S1_L001_R2_001.fastq.gz" + test_10x_10k_pbmc_5gex_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/5gex/5gex/subsampled_sc5p_v2_hs_PBMC_10k_5gex_S1_L001_R1_001.fastq.gz" + test_10x_10k_pbmc_5gex_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/5gex/5gex/subsampled_sc5p_v2_hs_PBMC_10k_5gex_S1_L001_R2_001.fastq.gz" + test_10x_10k_pbmc_b_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/bcell/subsampled_sc5p_v2_hs_PBMC_10k_b_S1_L001_R1_001.fastq.gz" + test_10x_10k_pbmc_b_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/bcell/subsampled_sc5p_v2_hs_PBMC_10k_b_S1_L001_R2_001.fastq.gz" + test_10x_10k_pbmc_t_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/tcell/subsampled_sc5p_v2_hs_PBMC_10k_t_S1_L001_R1_001.fastq.gz" + test_10x_10k_pbmc_t_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/tcell/subsampled_sc5p_v2_hs_PBMC_10k_t_S1_L001_R2_001.fastq.gz" + test_10x_10k_pbmc_feature_ref_csv = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/sc5p_v2_hs_PBMC_10k_multi_5gex_5fb_b_t_feature_ref.csv" + + test_10x_10k_pbmc_cmo_cmo_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/cmo/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_multiplexing_capture_S1_L001_R1_001.fastq.gz" + test_10x_10k_pbmc_cmo_cmo_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/cmo/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_multiplexing_capture_S1_L001_R2_001.fastq.gz" + test_10x_10k_pbmc_cmo_gex1_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_1/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_gex_S2_L001_R1_001.fastq.gz" + test_10x_10k_pbmc_cmo_gex1_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_1/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_gex_S2_L001_R2_001.fastq.gz" + test_10x_10k_pbmc_cmo_gex2_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_2/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_2_gex_S1_L001_R1_001.fastq.gz" + test_10x_10k_pbmc_cmo_gex2_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_2/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_2_gex_S1_L001_R2_001.fastq.gz" + test_10x_10k_pbmc_cmo_feature_ref_csv = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/10k_pbmc_cmo_count_feature_reference.csv" + + test_10x_5k_cmvpos_tcells_ab_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/ab/subsampled_5k_human_antiCMV_T_TBNK_connect_AB_S2_L004_R1_001.fastq.gz" + test_10x_5k_cmvpos_tcells_ab_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/ab/subsampled_5k_human_antiCMV_T_TBNK_connect_AB_S2_L004_R2_001.fastq.gz" + test_10x_5k_cmvpos_tcells_gex1_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/gex_1/subsampled_5k_human_antiCMV_T_TBNK_connect_GEX_1_S1_L001_R1_001.fastq.gz" + test_10x_5k_cmvpos_tcells_gex1_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/gex_1/subsampled_5k_human_antiCMV_T_TBNK_connect_GEX_1_S1_L001_R2_001.fastq.gz" + test_10x_5k_cmvpos_tcells_vdj_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/vdj/subsampled_5k_human_antiCMV_T_TBNK_connect_VDJ_S1_L001_R1_001.fastq.gz" + test_10x_5k_cmvpos_tcells_vdj_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/vdj/subsampled_5k_human_antiCMV_T_TBNK_connect_VDJ_S1_L001_R2_001.fastq.gz" + test_10x_5k_cmvpos_tcells_feature_ref_csv = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/5k_human_antiCMV_T_TBNK_connect_Multiplex_count_feature_reference.csv" + + test_10x_vdj_ref_json = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/references/vdj/refdata-cellranger-vdj-GRCh38-alts-ensembl-5.0.0/reference.json" + test_10x_vdj_ref_fasta = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/references/vdj/refdata-cellranger-vdj-GRCh38-alts-ensembl-5.0.0/fasta/regions.fa" + test_10x_vdj_ref_suppfasta = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/references/vdj/refdata-cellranger-vdj-GRCh38-alts-ensembl-5.0.0/fasta/supp_regions.fa" + } + spaceranger { + test_10x_ffpe_cytassist_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/spaceranger/human-brain-cancer-11-mm-capture-area-ffpe-2-standard_v2_ffpe_cytassist/CytAssist_11mm_FFPE_Human_Glioblastoma_2_S1_L001_R1_001.fastq.gz" + test_10x_ffpe_cytassist_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/spaceranger/human-brain-cancer-11-mm-capture-area-ffpe-2-standard_v2_ffpe_cytassist/CytAssist_11mm_FFPE_Human_Glioblastoma_2_S1_L001_R2_001.fastq.gz" + test_10x_ffpe_cytassist_image = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/spaceranger/human-brain-cancer-11-mm-capture-area-ffpe-2-standard_v2_ffpe_cytassist/CytAssist_11mm_FFPE_Human_Glioblastoma_image.tif" + test_10x_ffpe_cytassist_probeset = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/spaceranger/human-brain-cancer-11-mm-capture-area-ffpe-2-standard_v2_ffpe_cytassist/CytAssist_11mm_FFPE_Human_Glioblastoma_probe_set.csv" + + test_10x_ffpe_v1_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/spaceranger/human-ovarian-cancer-1-standard_v1_ffpe/Visium_FFPE_Human_Ovarian_Cancer_S1_L001_R1_001.fastq.gz" + test_10x_ffpe_v1_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/spaceranger/human-ovarian-cancer-1-standard_v1_ffpe/Visium_FFPE_Human_Ovarian_Cancer_S1_L001_R2_001.fastq.gz" + test_10x_ffpe_v1_image = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/spaceranger/human-ovarian-cancer-1-standard_v1_ffpe/Visium_FFPE_Human_Ovarian_Cancer_image.jpg" + } + } + 'genome' { + genome_elfasta = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.elfasta" + genome_fasta = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.fasta" + genome_fasta_fai = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.fasta.fai" + genome_fasta_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.fasta.gz" + genome_fasta_gz_fai = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.fasta.gz.fai" + genome_fasta_gz_gzi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.fasta.gz.gzi" + genome_strtablefile = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome_strtablefile.zip" + genome_dict = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.dict" + genome_gff3 = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.gff3" + genome_gtf = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.gtf" + genome_interval_list = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.interval_list" + genome_multi_interval_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.multi_intervals.bed" + genome_blacklist_interval_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.blacklist_intervals.bed" + genome_sizes = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.sizes" + genome_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.bed" + genome_header = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.header" + genome_bed_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.bed.gz" + genome_bed_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.bed.gz.tbi" + genome_elsites = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.elsites" + transcriptome_fasta = "${params.test_data_base}/data/genomics/homo_sapiens/genome/transcriptome.fasta" + genome2_fasta = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome2.fasta" + genome_chain_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.chain.gz" + genome_annotated_interval_tsv = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.annotated_intervals.tsv" + genome_mt_gb = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.NC_012920_1.gb" + genome_preprocessed_count_tsv = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.preprocessed_intervals.counts.tsv" + genome_preprocessed_interval_list = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.preprocessed_intervals.interval_list" + genome_ploidy_model = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.ploidy_model.tar.gz" + genome_ploidy_calls = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.ploidy_calls.tar.gz" + genome_germline_cnv_model = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.germline_cnv_model.tar.gz" + genome_germline_cnv_calls = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.germline_cnv_calls.tar.gz" + genome_21_sdf = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome_sdf.tar.gz" + genome_21_fasta = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" + genome_21_fasta_fai = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai" + genome_21_gencode_gtf = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/chr21_gencode.gtf" + genome_21_dict = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.dict" + genome_21_sizes = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.sizes" + genome_21_interval_list = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.interval_list" + genome_21_annotated_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/annotated.bed" + genome_21_multi_interval_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" + genome_21_multi_interval_antitarget_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.antitarget.bed" + genome_21_multi_interval_bed_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz" + genome_21_multi_interval_bed_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz.tbi" + genome_21_chromosomes_dir = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz" + genome_21_reference_cnn = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/reference_chr21.cnn" + genome_21_eigenstrat_snp = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/chr_21.snp" + genome_21_stitch_posfile = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/dbsnp_138.hg38.first_10_biallelic_sites.tsv" + + dbsnp_146_hg38_elsites = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.elsites" + dbsnp_146_hg38_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" + dbsnp_146_hg38_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi" + gnomad_r2_1_1_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz" + gnomad_r2_1_1_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz.tbi" + mills_and_1000g_indels_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz" + mills_and_1000g_indels_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz.tbi" + syntheticvcf_short_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.vcf.gz" + syntheticvcf_short_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.vcf.gz.tbi" + syntheticvcf_short_score = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.score" + gnomad_r2_1_1_sv_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1-sv.vcf.gz" + gnomad2_r2_1_1_sv_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD2.r2.1.1-sv.vcf.gz" + + hapmap_3_3_hg38_21_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/hapmap_3.3.hg38.vcf.gz" + hapmap_3_3_hg38_21_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/hapmap_3.3.hg38.vcf.gz.tbi" + res_1000g_omni2_5_hg38_21_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/1000G_omni2.5.hg38.vcf.gz" + res_1000g_omni2_5_hg38_21_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/1000G_omni2.5.hg38.vcf.gz.tbi" + res_1000g_phase1_snps_hg38_21_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/1000G_phase1.snps.hg38.vcf.gz" + res_1000g_phase1_snps_hg38_21_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/1000G_phase1.snps.hg38.vcf.gz.tbi" + dbsnp_138_hg38_21_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz" + dbsnp_138_hg38_21_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi" + gnomad_r2_1_1_21_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz" + gnomad_r2_1_1_21_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz.tbi" + mills_and_1000g_indels_21_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz" + mills_and_1000g_indels_21_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz.tbi" + haplotype_map = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/haplotype_map.txt" + dbNSFP_4_1a_21_hg38_txt_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbNSFP4.1a.21.txt.gz" + dbNSFP_4_1a_21_hg38_txt_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbNSFP4.1a.21.txt.gz.tbi" + + index_salmon = "${params.test_data_base}/data/genomics/homo_sapiens/genome/index/salmon" + repeat_expansions = "${params.test_data_base}/data/genomics/homo_sapiens/genome/loci/repeat_expansions.json" + justhusky_ped = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/ped/justhusky.ped" + justhusky_minimal_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/ped/justhusky_minimal.vcf.gz" + justhusky_minimal_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/ped/justhusky_minimal.vcf.gz.tbi" + + vcfanno_tar_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/vcfanno/vcfanno_grch38_module_test.tar.gz" + vcfanno_toml = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/vcfanno/vcfanno.toml" + updsites_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/updsites.bed" + + prg_input = "${params.test_data_base}/data/genomics/homo_sapiens/genome/PRG_test.zip" + crispr_functional_counts = "${params.test_data_base}/data/genomics/homo_sapiens/genome/tsv/functional_genomics_counts.tsv" + crispr_functional_library = "${params.test_data_base}/data/genomics/homo_sapiens/genome/tsv/library_functional_genomics.tsv" + + vep_cache = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vep.tar.gz" + affy_array_samplesheet = "${params.test_data_base}/data/genomics/homo_sapiens/array_expression/GSE38751.csv" + affy_array_celfiles_tar = "${params.test_data_base}/data/genomics/homo_sapiens/array_expression/GSE38751_RAW.tar" + + } + 'pangenome' { + pangenome_fa = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.fa" + pangenome_fa_bgzip = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.fa.gz" + pangenome_fa_bgzip_fai = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.fa.gz.fai" + pangenome_fa_bgzip_gzi = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.fa.gz.gzi" + pangenome_paf = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.paf" + pangenome_paf_gz = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.paf.gz" + pangenome_seqwish_gfa = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.seqwish.gfa" + pangenome_smoothxg_gfa = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.smoothxg.gfa" + pangenome_gfaffix_gfa = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.gfaffix.gfa" + 'odgi' { + pangenome_og = "${params.test_data_base}/data/pangenomics/homo_sapiens/odgi/pangenome.og" + pangenome_lay = "${params.test_data_base}/data/pangenomics/homo_sapiens/odgi/pangenome.lay" + } + } + 'illumina' { + test_paired_end_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam" + test_paired_end_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai" + test_paired_end_name_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.name.sorted.bam" + test_paired_end_markduplicates_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam" + test_paired_end_markduplicates_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam.bai" + test_paired_end_markduplicates_sorted_referencesn_txt = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.referencesn.txt" + test_paired_end_recalibrated_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam" + test_paired_end_recalibrated_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai" + test_paired_end_umi_consensus_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_consensus.bam" + test_paired_end_umi_converted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_converted.bam" + test_paired_end_umi_grouped_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_grouped.bam" + test_paired_end_umi_histogram_txt = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_histogram.txt" + test_paired_end_umi_unsorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_unsorted.bam" + test_paired_end_umi_unsorted_tagged_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.unsorted_tagged.bam" + test_paired_end_hla = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/example_hla_pe.bam" + test_paired_end_hla_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/example_hla_pe.sorted.bam" + test_paired_end_hla_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/example_hla_pe.sorted.bam.bai" + test_rna_paired_end_sorted_chr6_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.rna.paired_end.sorted.chr6.bam" + test_rna_paired_end_sorted_chr6_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.rna.paired_end.sorted.chr6.bam.bai" + + test2_paired_end_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam" + test2_paired_end_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai" + test2_paired_end_name_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.name.sorted.bam" + test2_paired_end_markduplicates_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.markduplicates.sorted.bam" + test2_paired_end_markduplicates_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.markduplicates.sorted.bam.bai" + test2_paired_end_recalibrated_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam" + test2_paired_end_recalibrated_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai" + test2_paired_end_umi_consensus_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_consensus.bam" + test2_paired_end_umi_converted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_converted.bam" + test2_paired_end_umi_grouped_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_grouped.bam" + test2_paired_end_umi_histogram_txt = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_histogram.txt" + test2_paired_end_umi_unsorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_unsorted.bam" + test2_paired_end_umi_unsorted_tagged_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.unsorted_tagged.bam" + test_paired_end_duplex_umi_unmapped_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_unmapped.bam" + test_paired_end_duplex_umi_mapped_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_mapped.bam" + test_paired_end_duplex_umi_mapped_tagged_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_mapped_tagged.bam" + test_paired_end_duplex_umi_grouped_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_grouped.bam" + test_paired_end_duplex_umi_duplex_consensus_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_duplex_consensus.bam" + + mitochon_standin_recalibrated_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam" + mitochon_standin_recalibrated_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam.bai" + test_illumina_mt_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test_illumina_mt.bam" + test_illumina_mt_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test_illumina_mt.bam.bai" + + test3_single_end_markduplicates_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam" + + read_group_settings_txt = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/read_group_settings.txt" + + test_paired_end_sorted_cram = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram" + test_paired_end_sorted_cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai" + test_paired_end_markduplicates_sorted_cram = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram" + test_paired_end_markduplicates_sorted_cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram.crai" + test_paired_end_recalibrated_sorted_cram = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram" + test_paired_end_recalibrated_sorted_cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai" + + test2_paired_end_sorted_cram = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram" + test2_paired_end_sorted_cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram.crai" + test2_paired_end_markduplicates_sorted_cram = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram" + test2_paired_end_markduplicates_sorted_cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram.crai" + test2_paired_end_recalibrated_sorted_cram = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram" + test2_paired_end_recalibrated_sorted_cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai" + test3_paired_end_recalibrated_sorted_cram = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test3.paired_end.recalibrated.sorted.cram" + test3_paired_end_recalibrated_sorted_cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test3.paired_end.recalibrated.sorted.cram.crai" + + test_1_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz" + test_2_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz" + test_umi_1_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test.umi_1.fastq.gz" + test_umi_2_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test.umi_2.fastq.gz" + test2_1_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz" + test2_2_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test2_2.fastq.gz" + test2_umi_1_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test2.umi_1.fastq.gz" + test2_umi_2_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test2.umi_2.fastq.gz" + test_rnaseq_1_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz" + test_rnaseq_2_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz" + test_paired_end_duplex_umi_1_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test_duplex_umi_1.fastq.gz" + test_paired_end_duplex_umi_2_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test_duplex_umi_2.fastq.gz" + + test_baserecalibrator_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test.baserecalibrator.table" + test2_baserecalibrator_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test2.baserecalibrator.table" + test_pileups_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test.pileups.table" + test2_pileups_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test2.pileups.table" + + test_paired_end_sorted_dragstrmodel = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test_paired_end_sorted_dragstrmodel.txt" + + test_genomicsdb_tar_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz" + test_pon_genomicsdb_tar_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test_pon_genomicsdb.tar.gz" + + test2_haplotc_ann_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz" + test2_haplotc_ann_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz.tbi" + test_haplotc_cnn_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz" + test_haplotc_cnn_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi" + + test2_haplotc_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.vcf.gz" + test2_haplotc_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.vcf.gz.tbi" + + test2_recal = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.recal" + test2_recal_idx = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.recal.idx" + test2_tranches = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.tranches" + test2_allele_specific_recal = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2_allele_specific.recal" + test2_allele_specific_recal_idx = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2_allele_specific.recal.idx" + test2_allele_specific_tranches = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2_allele_specific.tranches" + + test_test2_paired_mutect2_calls_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz" + test_test2_paired_mutect2_calls_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi" + test_test2_paired_mutect2_calls_vcf_gz_stats = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.stats" + test_test2_paired_mutect2_calls_f1r2_tar_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.f1r2.tar.gz" + test_test2_paired_mutect2_calls_artifact_prior_tar_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test_test2_paired_mutect2_calls.artifact-prior.tar.gz" + test_test2_paired_segmentation_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test_test2_paired.segmentation.table" + test_test2_paired_contamination_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test_test2_paired.contamination.table" + + test_genome_vcf = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf" + test_genome_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz" + test_genome_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi" + test_genome_vcf_idx = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.idx" + + test_genome_vcf_ud = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/svd/test.genome.vcf.UD" + test_genome_vcf_mu = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/svd/test.genome.vcf.mu" + test_genome_vcf_bed = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/svd/test.genome.vcf.bed" + + test2_genome_vcf = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf" + test2_genome_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz" + test2_genome_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz.tbi" + test2_genome_vcf_idx = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.idx" + + test_genome21_indels_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/test.genome_21.somatic_sv.vcf.gz" + test_genome21_indels_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/test.genome_21.somatic_sv.vcf.gz.tbi" + + test_mpileup = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/mpileup/test.mpileup.gz" + test2_mpileup = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/mpileup/test2.mpileup.gz" + + test_broadpeak = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/broadpeak/test.broadPeak" + test2_broadpeak = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/broadpeak/test2.broadPeak" + + test_narrowpeak = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/narrowpeak/test.narrowPeak" + test2_narrowpeak = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/narrowpeak/test2.narrowPeak" + + test_yak = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/yak/test.yak" + test2_yak = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/yak/test2.yak" + + cutandrun_bedgraph_test_1 = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bedgraph/cutandtag_h3k27me3_test_1.bedGraph" + cutandrun_bedgraph_test_2 = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bedgraph/cutandtag_igg_test_1.bedGraph" + + empty_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/empty.vcf.gz" + empty_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/empty.vcf.gz.tbi" + + test_rnaseq_vcf = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/test.rnaseq.vcf" + test_sv_vcf = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/sv_query.vcf.gz" + test_sv_vcf_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/sv_query.vcf.gz.tbi" + genmod_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/genmod.vcf.gz" + genmod_annotate_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/test_annotate.vcf.gz" + genmod_models_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/test_models.vcf.gz" + genmod_score_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/test_score.vcf.gz" + + test_mito_vcf = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/NA12878_chrM.vcf.gz" + + test_pytor = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/pytor/test.pytor" + rank_model = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/genmod/svrank_model_-v1.8-.ini" + + test_flowcell = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bcl/flowcell.tar.gz" + test_flowcell_samplesheet = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bcl/flowcell_samplesheet.csv" + + varlociraptor_scenario = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/varlociraptor/scenario.yml" + + contig_ploidy_priors_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" + + purecn_ex1_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_ex1.bam" + purecn_ex1_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_ex1.bam.bai" + purecn_ex1_interval = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_ex1_intervals.txt" + purecn_ex1_normal = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_ex1_normal.txt.gz" + purecn_ex2_normal = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_ex2_normal.txt.gz" + purecn_normalpanel_vcf = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_normalpanel.vcf.gz" + purecn_normalpanel_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_normalpanel.vcf.gz.tbi" + } + 'pacbio' { + primers = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/fasta/primers.fasta" + alz = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.bam" + alzpbi = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.bam.pbi" + ccs = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.bam" + ccs_fa = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/fasta/alz.ccs.fasta" + ccs_fa_gz = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/fasta/alz.ccs.fasta.gz" + ccs_fq = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/fastq/alz.ccs.fastq" + ccs_fq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/fastq/alz.ccs.fastq.gz" + ccs_xml = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/xml/alz.ccs.consensusreadset.xml" + hifi = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz" + lima = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.bam" + refine = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.bam" + cluster = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.bam" + singletons = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.bam" + aligned = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned.bam" + alignedbai = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned.bam.bai" + genemodel1 = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bed/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned_tc.bed" + genemodel2 = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bed/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned_tc.2.bed" + filelist = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/txt/filelist.txt" + } + 'scramble' { + fasta = "${params.test_data_base}/data/genomics/homo_sapiens/scramble/test.fa" + fasta_fai = "${params.test_data_base}/data/genomics/homo_sapiens/scramble/test.fa.fai" + bam = "${params.test_data_base}/data/genomics/homo_sapiens/scramble/test.bam" + bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/scramble/test.bam.bai" + cram = "${params.test_data_base}/data/genomics/homo_sapiens/scramble/test.cram" + cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/scramble/test.cram.crai" + bed = "${params.test_data_base}/data/genomics/homo_sapiens/scramble/test.bed" + } + 'gene_set_analysis' { + gct = "${params.test_data_base}/data/genomics/homo_sapiens/gene_set_analysis/P53_6samples_collapsed_symbols.gct" + cls = "${params.test_data_base}/data/genomics/homo_sapiens/gene_set_analysis/P53_6samples.cls" + gmx = "${params.test_data_base}/data/genomics/homo_sapiens/gene_set_analysis/c1.symbols.reduced.gmx" + } + 'cnvkit' { + amplicon_cnr = "https://raw.githubusercontent.com/etal/cnvkit/v0.9.9/test/formats/amplicon.cnr" + amplicon_cns = "https://raw.githubusercontent.com/etal/cnvkit/v0.9.9/test/formats/amplicon.cns" + } + } + 'bacteroides_fragilis' { + 'genome' { + genome_fna_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz" + genome_gbff_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/genome/genome.gbff.gz" + genome_paf = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/genome/genome.paf" + genome_gff_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/genome/genome.gff.gz" + + } + 'hamronization' { + genome_abricate_tsv = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/hamronization/genome.abricate.tsv" + genome_mapping_potential_arg = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/hamronization/genome.mapping.potential.ARG" + } + 'illumina' { + test1_contigs_fa_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz" + test1_1_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_1.fastq.gz" + test1_2_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_2.fastq.gz" + test2_1_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test2_1.fastq.gz" + test2_2_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test2_2.fastq.gz" + test1_paired_end_bam = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/bam/test1.bam" + test1_paired_end_sorted_bam = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/bam/test1.sorted.bam" + test1_paired_end_sorted_bam_bai = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/bam/test1.sorted.bam.bai" + test2_paired_end_bam = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/bam/test2.bam" + test2_paired_end_sorted_bam = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/bam/test2.sorted.bam" + test2_paired_end_sorted_bam_bai = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/bam/test2.sorted.bam.bai" + } + 'nanopore' { + test_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/nanopore/fastq/test.fastq.gz" + overlap_paf = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/nanopore/overlap.paf" + } + } + 'candidatus_portiera_aleyrodidarum' { + 'genome' { + genome_fasta = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.fasta" + genome_sizes = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.sizes" + genome_aln_gz = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.aln.gz" + genome_aln_nwk = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.aln.nwk" + proteome_fasta = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/proteome.fasta" + test1_gff = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test1.gff" + test2_gff = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test2.gff" + test3_gff = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test3.gff" + } + 'illumina' { + test_1_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/illumina/fastq/test_1.fastq.gz" + test_2_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/illumina/fastq/test_2.fastq.gz" + test_se_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/illumina/fastq/test_se.fastq.gz" + } + 'nanopore' { + test_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/nanopore/fastq/test.fastq.gz" + } + } + 'haemophilus_influenzae' { + 'genome' { + genome_fna_gz = "${params.test_data_base}/data/genomics/prokaryotes/haemophilus_influenzae/genome/genome.fna.gz" + genome_aln_gz = "${params.test_data_base}/data/genomics/prokaryotes/haemophilus_influenzae/genome/genome.aln.gz" + genome_aln_nwk = "${params.test_data_base}/data/genomics/prokaryotes/haemophilus_influenzae/genome/genome.aln.nwk" + } + } + 'generic' { + 'csv' { + test_csv = "${params.test_data_base}/data/generic/csv/test.csv" + } + 'notebooks' { + rmarkdown = "${params.test_data_base}/data/generic/notebooks/rmarkdown/rmarkdown_notebook.Rmd" + ipython_md = "${params.test_data_base}/data/generic/notebooks/jupyter/ipython_notebook.md" + ipython_ipynb = "${params.test_data_base}/data/generic/notebooks/jupyter/ipython_notebook.ipynb" + } + 'tar' { + tar_gz = "${params.test_data_base}/data/generic/tar/hello.tar.gz" + } + 'tsv' { + test_tsv = "${params.test_data_base}/data/generic/tsv/test.tsv" + } + 'txt' { + hello = "${params.test_data_base}/data/generic/txt/hello.txt" + } + 'cooler'{ + test_pairix_pair_gz = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz" + test_pairix_pair_gz_px2 = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz.px2" + test_pairs_pair = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/cload/hg19/hg19.sample1.pairs" + test_tabix_pair_gz = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.sorted.possrt.txt.gz" + test_tabix_pair_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.sorted.possrt.txt.gz.tbi" + hg19_chrom_sizes = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/cload/hg19/hg19.chrom.sizes" + test_merge_cool = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/merge/toy/toy.symm.upper.2.cool" + test_merge_cool_cp2 = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/merge/toy/toy.symm.upper.2.cp2.cool" + + } + 'pairtools' { + mock_4dedup_pairsam = "${params.test_data_base}/data/genomics/homo_sapiens/pairtools/mock.4dedup.pairsam" + mock_4flip_pairs = "${params.test_data_base}/data/genomics/homo_sapiens/pairtools/mock.4flip.pairs" + mock_chrom_sizes = "${params.test_data_base}/data/genomics/homo_sapiens/pairtools/mock.chrom.sizes" + mock_pairsam = "${params.test_data_base}/data/genomics/homo_sapiens/pairtools/mock.pairsam" + mock_sam = "${params.test_data_base}/data/genomics/homo_sapiens/pairtools/mock.sam" + frag_bed = "${params.test_data_base}/data/genomics/homo_sapiens/pairtools/frag.bed" + } + 'config' { + ncbi_user_settings = "${params.test_data_base}/data/generic/config/ncbi_user_settings.mkfg" + } + 'unsorted_data' { + 'unsorted_text' { + genome_file = "${params.test_data_base}/data/generic/unsorted_data/unsorted_text/test.genome" + intervals = "${params.test_data_base}/data/generic/unsorted_data/unsorted_text/test.bed" + numbers_csv = "${params.test_data_base}/data/generic/unsorted_data/unsorted_text/test.csv" + } + } + } + 'proteomics' { + 'msspectra' { + ups_file1 = "${params.test_data_base}/data/proteomics/msspectra/OVEMB150205_12.raw" + ups_file2 = "${params.test_data_base}/data/proteomics/msspectra/OVEMB150205_14.raw" + } + 'database' { + yeast_ups = "${params.test_data_base}/data/proteomics/database/yeast_UPS.fasta" + } + 'maxquant' { + mq_contrasts = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_contrasts.csv" + mq_proteingroups = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_proteinGroups.txt" + mq_samplesheet = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_samplesheet.tsv" + mq_proteus_mat = "${params.test_data_base}/data/proteomics/maxquant/proteus.raw_MaxQuant_proteingroups_tab.tsv" + } + 'parameter' { + maxquant = "${params.test_data_base}/data/proteomics/parameter/mqpar.xml" + } + 'idfile' { + openms_idxml = "${params.test_data_base}/data/proteomics/openms_idxml/BSA_QC_file.idXML" + } + } + 'galaxea_fascicularis' { + hic { + pretext = "${params.test_data_base}/data/genomics/eukaryotes/galaxea_fascicularis/hic/jaGalFasc40_2.pretext" + } + } + 'deilephila_porcellus' { + 'mito' { + ref_fa = "${params.test_data_base}/data/genomics/eukaryotes/deilephila_porcellus/mito/MW539688.1.fasta" + ref_gb = "${params.test_data_base}/data/genomics/eukaryotes/deilephila_porcellus/mito/MW539688.1.gb" + hifi_reads = "${params.test_data_base}/data/genomics/eukaryotes/deilephila_porcellus/mito/ilDeiPorc1.HiFi.reads.fa" + contigs = "${params.test_data_base}/data/genomics/eukaryotes/deilephila_porcellus/mito/ilDeiPorc1.contigs.fa" + } + } + 'imaging' { + 'h5' { + plant_wga = "${params.test_data_base}/data/imaging/h5/plant_wga.h5" + plant_wga_prob = "${params.test_data_base}/data/imaging/h5/plant_wga_probabilities.h5" + } + 'ilp' { + plant_wga_multicut = "${params.test_data_base}/data/imaging/ilp/plant_wga.multicut.ilp" + plant_wga_pixel_class = "${params.test_data_base}/data/imaging/ilp/plant_wga.pixel_prob.ilp" + } + 'tiff' { + mouse_heart_wga = "${params.test_data_base}/data/imaging/tiff/mindagap.mouse_heart.wga.tiff" + } + 'ome-tiff' { + cycif_tonsil_channels = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-channels.csv" + cycif_tonsil_cycle1 = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif" + cycif_tonsil_cycle2 = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-cycle2.ome.tif" + cycif_tonsil_cycle3 = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-cycle3.ome.tif" + cycif_tonsil_dfp = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-dfp.ome.tif" + cycif_tonsil_ffp = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-ffp.ome.tif" + } + 'registration' { + markers = "${params.test_data_base}/data/imaging/registration/markers.csv" + cycle1 = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif" + cycle2 = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-cycle2.ome.tif" + } + 'segmentation' { + markers = "${params.test_data_base}/data/imaging/segmentation/markers.csv" + image = "${params.test_data_base}/data/imaging/segmentation/cycif_tonsil_registered.ome.tif" + } + 'quantification' { + markers = "${params.test_data_base}/data/imaging/quantification/markers.csv" + image = "${params.test_data_base}/data/imaging/quantification/cycif_tonsil_registered.ome.tif" + mask = "${params.test_data_base}/data/imaging/quantification/cell.ome.tif" + } + 'downstream' { + markers = "${params.test_data_base}/data/imaging/downstream/markers.csv" + cell_feature_array = "${params.test_data_base}/data/imaging/downstream/cycif_tonsil_cell.csv" + } + 'background_subtraction' { + markers = "${params.test_data_base}/data/imaging/background_subtraction/markers.csv" + image = "${params.test_data_base}/data/imaging/background_subtraction/cycif_tonsil_registered.ome.tif" + } + 'core_detection' { + image = "${params.test_data_base}/data/imaging/core_detection/single_core_dapi.tif" + } + } + } +} diff --git a/tests/modules/mskcc/bwa/index/main.nf b/tests/modules/mskcc/bwa/index/main.nf new file mode 100644 index 00000000..1f0bf081 --- /dev/null +++ b/tests/modules/mskcc/bwa/index/main.nf @@ -0,0 +1,14 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { BWA_INDEX } from '../../../../../modules/nf-core/bwa/index/main.nf' + +workflow test_bwa_index { + fasta = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + + BWA_INDEX ( fasta ) +} diff --git a/tests/modules/mskcc/bwa/index/nextflow.config b/tests/modules/mskcc/bwa/index/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/tests/modules/mskcc/bwa/index/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/modules/mskcc/bwa/index/test.yml b/tests/modules/mskcc/bwa/index/test.yml new file mode 100644 index 00000000..f561f63b --- /dev/null +++ b/tests/modules/mskcc/bwa/index/test.yml @@ -0,0 +1,30 @@ +- name: bwa index test_bwa_index + command: nextflow run ./tests/modules/nf-core/bwa/index -entry test_bwa_index -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/bwa/index/nextflow.config + tags: + - bwa + - bwa/index + files: + - path: output/bwa/bwa/genome.amb + md5sum: 3a68b8b2287e07dd3f5f95f4344ba76e + - path: output/bwa/bwa/genome.ann + md5sum: c32e11f6c859f166c7525a9c1d583567 + - path: output/bwa/bwa/genome.bwt + md5sum: 0469c30a1e239dd08f68afe66fde99da + - path: output/bwa/bwa/genome.pac + md5sum: 983e3d2cd6f36e2546e6d25a0da78d66 + - path: output/bwa/bwa/genome.sa + md5sum: ab3952cabf026b48cd3eb5bccbb636d1 + - path: output/bwa/versions.yml + +- name: bwa index test_bwa_index_stub + command: nextflow run ./tests/modules/nf-core/bwa/index -entry test_bwa_index -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/bwa/index/nextflow.config -stub + tags: + - bwa + - bwa/index + files: + - path: output/bwa/bwa/genome.amb + - path: output/bwa/bwa/genome.ann + - path: output/bwa/bwa/genome.bwt + - path: output/bwa/bwa/genome.pac + - path: output/bwa/bwa/genome.sa + - path: output/bwa/versions.yml diff --git a/tests/modules/mskcc/bwa/mem/main.nf b/tests/modules/mskcc/bwa/mem/main.nf new file mode 100644 index 00000000..7ca09536 --- /dev/null +++ b/tests/modules/mskcc/bwa/mem/main.nf @@ -0,0 +1,125 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { BWA_INDEX } from '../../../../../modules/mskcc/bwa/index/main.nf' +include { BWA_MEM } from '../../../../../modules/mskcc/bwa/mem/main.nf' + +// +// Test with single-end data +// +workflow test_bwa_mem_single_end { + input = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + fasta = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + + BWA_INDEX ( fasta ) + BWA_MEM ( input, BWA_INDEX.out.index, false ) +} + +// +// Test with single-end data and sort +// +workflow test_bwa_mem_single_end_sort { + input = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + fasta = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + + BWA_INDEX ( fasta ) + BWA_MEM ( input, BWA_INDEX.out.index, true ) +} + +// +// Test with paired-end data +// +workflow test_bwa_mem_paired_end { + input = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + fasta = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + + BWA_INDEX ( fasta ) + BWA_MEM ( input, BWA_INDEX.out.index, false ) +} + +// +// Test with paired-end data and sort +// +workflow test_bwa_mem_paired_end_sort { + input = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + fasta = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + + BWA_INDEX ( fasta ) + BWA_MEM ( input, BWA_INDEX.out.index, true ) +} + + +// +// Test with stub pairedend +// +workflow test_bwa_mem_paired_end_stub { + input = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + fasta = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + + BWA_INDEX ( fasta ) + BWA_MEM ( input, BWA_INDEX.out.index, false ) +} + + +// +// Test with single-end data stub +// +workflow test_bwa_mem_single_end_stub { + input = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + fasta = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + + BWA_INDEX ( fasta ) + BWA_MEM ( input, BWA_INDEX.out.index, false ) +} diff --git a/tests/modules/mskcc/bwa/mem/nextflow.config b/tests/modules/mskcc/bwa/mem/nextflow.config new file mode 100644 index 00000000..d15f6939 --- /dev/null +++ b/tests/modules/mskcc/bwa/mem/nextflow.config @@ -0,0 +1,9 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: BWA_MEM { + ext.args2 = { sort_bam ? "" : "-bh" } + } + +} diff --git a/tests/modules/mskcc/bwa/mem/test.yml b/tests/modules/mskcc/bwa/mem/test.yml new file mode 100644 index 00000000..b83b183b --- /dev/null +++ b/tests/modules/mskcc/bwa/mem/test.yml @@ -0,0 +1,87 @@ +- name: bwa mem single-end + command: nextflow run ./tests/modules/mskcc/bwa/mem -entry test_bwa_mem_single_end -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/bwa/mem/nextflow.config + tags: + - bwa + - bwa/mem + files: + - path: ./output/bwa/test.bam + - path: ./output/bwa/bwa/genome.bwt + md5sum: 0469c30a1e239dd08f68afe66fde99da + - path: ./output/bwa/bwa/genome.amb + md5sum: 3a68b8b2287e07dd3f5f95f4344ba76e + - path: ./output/bwa/bwa/genome.ann + md5sum: c32e11f6c859f166c7525a9c1d583567 + - path: ./output/bwa/bwa/genome.pac + md5sum: 983e3d2cd6f36e2546e6d25a0da78d66 + - path: ./output/bwa/bwa/genome.sa + md5sum: ab3952cabf026b48cd3eb5bccbb636d1 + +- name: bwa mem single-end sort + command: nextflow run ./tests/modules/mskcc/bwa/mem -entry test_bwa_mem_single_end_sort -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/bwa/mem/nextflow.config + tags: + - bwa + - bwa/mem + files: + - path: ./output/bwa/test.bam + - path: ./output/bwa/bwa/genome.bwt + md5sum: 0469c30a1e239dd08f68afe66fde99da + - path: ./output/bwa/bwa/genome.amb + md5sum: 3a68b8b2287e07dd3f5f95f4344ba76e + - path: ./output/bwa/bwa/genome.ann + md5sum: c32e11f6c859f166c7525a9c1d583567 + - path: ./output/bwa/bwa/genome.pac + md5sum: 983e3d2cd6f36e2546e6d25a0da78d66 + - path: ./output/bwa/bwa/genome.sa + md5sum: ab3952cabf026b48cd3eb5bccbb636d1 + +- name: bwa mem paired-end + command: nextflow run ./tests/modules/mskcc/bwa/mem -entry test_bwa_mem_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/bwa/mem/nextflow.config + tags: + - bwa + - bwa/mem + files: + - path: ./output/bwa/test.bam + - path: ./output/bwa/bwa/genome.bwt + md5sum: 0469c30a1e239dd08f68afe66fde99da + - path: ./output/bwa/bwa/genome.amb + md5sum: 3a68b8b2287e07dd3f5f95f4344ba76e + - path: ./output/bwa/bwa/genome.ann + md5sum: c32e11f6c859f166c7525a9c1d583567 + - path: ./output/bwa/bwa/genome.pac + md5sum: 983e3d2cd6f36e2546e6d25a0da78d66 + - path: ./output/bwa/bwa/genome.sa + md5sum: ab3952cabf026b48cd3eb5bccbb636d1 + +- name: bwa mem paired-end sort + command: nextflow run ./tests/modules/mskcc/bwa/mem -entry test_bwa_mem_paired_end_sort -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/bwa/mem/nextflow.config + tags: + - bwa + - bwa/mem + files: + - path: ./output/bwa/test.bam + - path: ./output/bwa/bwa/genome.bwt + md5sum: 0469c30a1e239dd08f68afe66fde99da + - path: ./output/bwa/bwa/genome.amb + md5sum: 3a68b8b2287e07dd3f5f95f4344ba76e + - path: ./output/bwa/bwa/genome.ann + md5sum: c32e11f6c859f166c7525a9c1d583567 + - path: ./output/bwa/bwa/genome.pac + md5sum: 983e3d2cd6f36e2546e6d25a0da78d66 + - path: ./output/bwa/bwa/genome.sa + md5sum: ab3952cabf026b48cd3eb5bccbb636d1 + +- name: bwa mem paired-end stub + command: nextflow run ./tests/modules/mskcc/bwa/mem -entry test_bwa_mem_paired_end_sort -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/bwa/mem/nextflow.config -stub + tags: + - bwa + - bwa/mem + files: + - path: ./output/bwa/test.bam + +- name: bwa mem single-end stub + command: nextflow run ./tests/modules/mskcc/bwa/mem -entry test_bwa_mem_single_end_stub -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/bwa/mem/nextflow.config -stub + tags: + - bwa + - bwa/mem + files: + - path: ./output/bwa/test.bam diff --git a/tests/modules/mskcc/gatk4/applybqsr/main.nf b/tests/modules/mskcc/gatk4/applybqsr/main.nf new file mode 100644 index 00000000..d92b8c1f --- /dev/null +++ b/tests/modules/mskcc/gatk4/applybqsr/main.nf @@ -0,0 +1,47 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_APPLYBQSR } from '../../../../../modules/mskcc/gatk4/applybqsr/main.nf' + +workflow test_gatk4_applybqsr { + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_baserecalibrator_table'], checkIfExists: true), + [] + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + + GATK4_APPLYBQSR ( input, fasta, fai, dict ) +} + +workflow test_gatk4_applybqsr_intervals { + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_baserecalibrator_table'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + + GATK4_APPLYBQSR ( input, fasta, fai, dict ) +} + +workflow test_gatk4_applybqsr_cram { + input = [ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_baserecalibrator_table'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + GATK4_APPLYBQSR ( input, fasta, fai, dict ) +} diff --git a/tests/modules/mskcc/gatk4/applybqsr/nextflow.config b/tests/modules/mskcc/gatk4/applybqsr/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/tests/modules/mskcc/gatk4/applybqsr/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/modules/mskcc/gatk4/applybqsr/test.yml b/tests/modules/mskcc/gatk4/applybqsr/test.yml new file mode 100644 index 00000000..63156f65 --- /dev/null +++ b/tests/modules/mskcc/gatk4/applybqsr/test.yml @@ -0,0 +1,28 @@ +- name: gatk4 applybqsr test_gatk4_applybqsr + command: nextflow run ./tests/modules/mskcc/gatk4/applybqsr -entry test_gatk4_applybqsr -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/applybqsr/nextflow.config + tags: + - gatk4 + - gatk4/applybqsr + files: + - path: output/gatk4/test.bam + md5sum: e025e4351c5a3ff426bb5521cb353e0f + - path: output/gatk4/versions.yml + +- name: gatk4 applybqsr test_gatk4_applybqsr_intervals + command: nextflow run ./tests/modules/mskcc/gatk4/applybqsr -entry test_gatk4_applybqsr_intervals -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/applybqsr/nextflow.config + tags: + - gatk4 + - gatk4/applybqsr + files: + - path: output/gatk4/test.bam + md5sum: 13f8358467c9cda8a95194a1498ce605 + - path: output/gatk4/versions.yml + +- name: gatk4 applybqsr test_gatk4_applybqsr_cram + command: nextflow run ./tests/modules/mskcc/gatk4/applybqsr -entry test_gatk4_applybqsr_cram -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/applybqsr/nextflow.config + tags: + - gatk4 + - gatk4/applybqsr + files: + - path: output/gatk4/test.cram + - path: output/gatk4/versions.yml diff --git a/tests/modules/mskcc/gatk4/applybqsrspark/main.nf b/tests/modules/mskcc/gatk4/applybqsrspark/main.nf new file mode 100644 index 00000000..6e50454f --- /dev/null +++ b/tests/modules/mskcc/gatk4/applybqsrspark/main.nf @@ -0,0 +1,47 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_APPLYBQSR_SPARK } from '../../../../../modules/mskcc/gatk4/applybqsrspark/main.nf' + +workflow test_gatk4_applybqsr_spark { + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_baserecalibrator_table'], checkIfExists: true), + [] + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + + GATK4_APPLYBQSR_SPARK ( input, fasta, fai, dict ) +} + +workflow test_gatk4_applybqsr_spark_intervals { + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_baserecalibrator_table'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + + GATK4_APPLYBQSR_SPARK ( input, fasta, fai, dict ) +} + +workflow test_gatk4_applybqsr_spark_cram { + input = [ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_baserecalibrator_table'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + GATK4_APPLYBQSR_SPARK ( input, fasta, fai, dict ) +} diff --git a/tests/modules/mskcc/gatk4/applybqsrspark/nextflow.config b/tests/modules/mskcc/gatk4/applybqsrspark/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/tests/modules/mskcc/gatk4/applybqsrspark/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/modules/mskcc/gatk4/applybqsrspark/test.yml b/tests/modules/mskcc/gatk4/applybqsrspark/test.yml new file mode 100644 index 00000000..9bc3ad38 --- /dev/null +++ b/tests/modules/mskcc/gatk4/applybqsrspark/test.yml @@ -0,0 +1,28 @@ +- name: gatk4 applybqsrspark test_gatk4_applybqsr_spark + command: nextflow run ./tests/modules/mskcc/gatk4/applybqsrspark -entry test_gatk4_applybqsr_spark -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/applybqsrspark/nextflow.config + tags: + - gatk4 + - gatk4/applybqsrspark + files: + - path: output/gatk4/test.bam + md5sum: 1901c819fcba0fdd5e2482e6dc8285ef + - path: output/gatk4/versions.yml + +- name: gatk4 applybqsrspark test_gatk4_applybqsr_spark_intervals + command: nextflow run ./tests/modules/mskcc/gatk4/applybqsrspark -entry test_gatk4_applybqsr_spark_intervals -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/applybqsrspark/nextflow.config + tags: + - gatk4 + - gatk4/applybqsrspark + files: + - path: output/gatk4/test.bam + md5sum: 2ca2446f0125890280056fd7da822732 + - path: output/gatk4/versions.yml + +- name: gatk4 applybqsrspark test_gatk4_applybqsr_spark_cram + command: nextflow run ./tests/modules/mskcc/gatk4/applybqsrspark -entry test_gatk4_applybqsr_spark_cram -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/applybqsrspark/nextflow.config + tags: + - gatk4 + - gatk4/applybqsrspark + files: + - path: output/gatk4/test.cram + - path: output/gatk4/versions.yml diff --git a/tests/modules/mskcc/gatk4/baserecalibrator/main.nf b/tests/modules/mskcc/gatk4/baserecalibrator/main.nf new file mode 100644 index 00000000..abbd1653 --- /dev/null +++ b/tests/modules/mskcc/gatk4/baserecalibrator/main.nf @@ -0,0 +1,69 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_BASERECALIBRATOR } from '../../../../../modules/mskcc/gatk4/baserecalibrator/main.nf' + +workflow test_gatk4_baserecalibrator { + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + sites = file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) + sites_tbi = file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + + GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, sites, sites_tbi ) +} + +workflow test_gatk4_baserecalibrator_cram { + input = [ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + [] + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + sites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) + sites_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) + + GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, sites, sites_tbi ) +} + +workflow test_gatk4_baserecalibrator_intervals { + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + sites = file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) + sites_tbi = file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + + GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, sites, sites_tbi ) +} + +workflow test_gatk4_baserecalibrator_multiple_sites { + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + sites = [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true) + ] + sites_tbi = [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true) + ] + + GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, sites, sites_tbi ) +} diff --git a/tests/modules/mskcc/gatk4/baserecalibrator/nextflow.config b/tests/modules/mskcc/gatk4/baserecalibrator/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/tests/modules/mskcc/gatk4/baserecalibrator/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/modules/mskcc/gatk4/baserecalibrator/test.yml b/tests/modules/mskcc/gatk4/baserecalibrator/test.yml new file mode 100644 index 00000000..15074804 --- /dev/null +++ b/tests/modules/mskcc/gatk4/baserecalibrator/test.yml @@ -0,0 +1,39 @@ +- name: gatk4 baserecalibrator test_gatk4_baserecalibrator + command: nextflow run ./tests/modules/mskcc/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/baserecalibrator/nextflow.config + tags: + - gatk4 + - gatk4/baserecalibrator + files: + - path: output/gatk4/test.table + md5sum: e2e43abdc0c943c1a54dae816d0b9ea7 + - path: output/gatk4/versions.yml + +- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_cram + command: nextflow run ./tests/modules/mskcc/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_cram -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/baserecalibrator/nextflow.config + tags: + - gatk4 + - gatk4/baserecalibrator + files: + - path: output/gatk4/test.table + md5sum: 35d89a3811aa31711fc9815b6b80e6ec + - path: output/gatk4/versions.yml + +- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_intervals + command: nextflow run ./tests/modules/mskcc/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_intervals -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/baserecalibrator/nextflow.config + tags: + - gatk4 + - gatk4/baserecalibrator + files: + - path: output/gatk4/test.table + md5sum: 9ecb5f00a2229291705addc09c0ec231 + - path: output/gatk4/versions.yml + +- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_multiple_sites + command: nextflow run ./tests/modules/mskcc/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_multiple_sites -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/baserecalibrator/nextflow.config + tags: + - gatk4 + - gatk4/baserecalibrator + files: + - path: output/gatk4/test.table + md5sum: e2e43abdc0c943c1a54dae816d0b9ea7 + - path: output/gatk4/versions.yml diff --git a/tests/modules/mskcc/gatk4/baserecalibratorspark/main.nf b/tests/modules/mskcc/gatk4/baserecalibratorspark/main.nf new file mode 100644 index 00000000..0eb6e85a --- /dev/null +++ b/tests/modules/mskcc/gatk4/baserecalibratorspark/main.nf @@ -0,0 +1,69 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_BASERECALIBRATOR_SPARK } from '../../../../../modules/mskcc/gatk4/baserecalibratorspark/main.nf' + +workflow test_gatk4_baserecalibrator_spark { + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + sites = file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) + sites_tbi = file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + + GATK4_BASERECALIBRATOR_SPARK ( input, fasta, fai, dict, sites, sites_tbi ) +} + +workflow test_gatk4_baserecalibrator_spark_cram { + input = [ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + [] + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + sites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) + sites_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) + + GATK4_BASERECALIBRATOR_SPARK ( input, fasta, fai, dict, sites, sites_tbi ) +} + +workflow test_gatk4_baserecalibrator_spark_intervals { + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + sites = file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) + sites_tbi = file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + + GATK4_BASERECALIBRATOR_SPARK ( input, fasta, fai, dict, sites, sites_tbi ) +} + +workflow test_gatk4_baserecalibrator_spark_multiple_sites { + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + sites = [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true) + ] + sites_tbi = [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true) + ] + + GATK4_BASERECALIBRATOR_SPARK ( input, fasta, fai, dict, sites, sites_tbi ) +} diff --git a/tests/modules/mskcc/gatk4/baserecalibratorspark/nextflow.config b/tests/modules/mskcc/gatk4/baserecalibratorspark/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/tests/modules/mskcc/gatk4/baserecalibratorspark/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/modules/mskcc/gatk4/baserecalibratorspark/test.yml b/tests/modules/mskcc/gatk4/baserecalibratorspark/test.yml new file mode 100644 index 00000000..79fe87f8 --- /dev/null +++ b/tests/modules/mskcc/gatk4/baserecalibratorspark/test.yml @@ -0,0 +1,39 @@ +- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_spark + command: nextflow run ./tests/modules/mskcc/gatk4/baserecalibratorspark -entry test_gatk4_baserecalibrator_spark -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/baserecalibratorspark/nextflow.config + tags: + - gatk4 + - gatk4/baserecalibratorspark + files: + - path: output/gatk4/test.table + md5sum: e2e43abdc0c943c1a54dae816d0b9ea7 + - path: output/gatk4/versions.yml + +- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_spark_cram + command: nextflow run ./tests/modules/mskcc/gatk4/baserecalibratorspark -entry test_gatk4_baserecalibrator_spark_cram -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/baserecalibratorspark/nextflow.config + tags: + - gatk4 + - gatk4/baserecalibratorspark + files: + - path: output/gatk4/test.table + md5sum: 35d89a3811aa31711fc9815b6b80e6ec + - path: output/gatk4/versions.yml + +- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_spark_intervals + command: nextflow run ./tests/modules/mskcc/gatk4/baserecalibratorspark -entry test_gatk4_baserecalibrator_spark_intervals -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/baserecalibratorspark/nextflow.config + tags: + - gatk4 + - gatk4/baserecalibratorspark + files: + - path: output/gatk4/test.table + md5sum: 9ecb5f00a2229291705addc09c0ec231 + - path: output/gatk4/versions.yml + +- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_spark_multiple_sites + command: nextflow run ./tests/modules/mskcc/gatk4/baserecalibratorspark -entry test_gatk4_baserecalibrator_spark_multiple_sites -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/baserecalibratorspark/nextflow.config + tags: + - gatk4 + - gatk4/baserecalibratorspark + files: + - path: output/gatk4/test.table + md5sum: e2e43abdc0c943c1a54dae816d0b9ea7 + - path: output/gatk4/versions.yml diff --git a/tests/modules/mskcc/gatk4/createsequencedictionary/main.nf b/tests/modules/mskcc/gatk4/createsequencedictionary/main.nf new file mode 100644 index 00000000..d9b4e18f --- /dev/null +++ b/tests/modules/mskcc/gatk4/createsequencedictionary/main.nf @@ -0,0 +1,12 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../../../modules/mskcc/gatk4/createsequencedictionary/main.nf' + +workflow test_gatk4_createsequencedictionary { + fasta = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + GATK4_CREATESEQUENCEDICTIONARY ( fasta ) +} diff --git a/tests/modules/mskcc/gatk4/createsequencedictionary/nextflow.config b/tests/modules/mskcc/gatk4/createsequencedictionary/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/tests/modules/mskcc/gatk4/createsequencedictionary/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/modules/mskcc/gatk4/createsequencedictionary/test.yml b/tests/modules/mskcc/gatk4/createsequencedictionary/test.yml new file mode 100644 index 00000000..beb1766b --- /dev/null +++ b/tests/modules/mskcc/gatk4/createsequencedictionary/test.yml @@ -0,0 +1,9 @@ +- name: gatk4 createsequencedictionary test_gatk4_createsequencedictionary + command: nextflow run ./tests/modules/mskcc/gatk4/createsequencedictionary -entry test_gatk4_createsequencedictionary -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/createsequencedictionary/nextflow.config + tags: + - gatk4 + - gatk4/createsequencedictionary + files: + - path: output/gatk4/genome.dict + md5sum: 7362679f176e0f52add03c08f457f646 + - path: output/gatk4/versions.yml diff --git a/tests/modules/mskcc/gatk4/markduplicates/main.nf b/tests/modules/mskcc/gatk4/markduplicates/main.nf new file mode 100644 index 00000000..c9101ee0 --- /dev/null +++ b/tests/modules/mskcc/gatk4/markduplicates/main.nf @@ -0,0 +1,34 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_MARKDUPLICATES } from '../../../../../modules/mskcc/gatk4/markduplicates/main.nf' +include { GATK4_MARKDUPLICATES as GATK4_MARKDUPLICATES_CRAM } from '../../../../../modules/mskcc/gatk4/markduplicates/main.nf' + +workflow test_gatk4_markduplicates { + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + + GATK4_MARKDUPLICATES ( input, [], [] ) +} + +workflow test_gatk4_markduplicates_multiple_bams { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + ] ] + + GATK4_MARKDUPLICATES ( input, [], [] ) +} + +workflow test_gatk4_markduplicates_multiple_cram_output { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + ] ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + + GATK4_MARKDUPLICATES_CRAM ( input, fasta, fai ) +} diff --git a/tests/modules/mskcc/gatk4/markduplicates/nextflow.config b/tests/modules/mskcc/gatk4/markduplicates/nextflow.config new file mode 100644 index 00000000..5bdd5f04 --- /dev/null +++ b/tests/modules/mskcc/gatk4/markduplicates/nextflow.config @@ -0,0 +1,15 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: GATK4_MARKDUPLICATES { + ext.args = '--CREATE_INDEX true' + ext.prefix = { "${meta.id}.bam" } + } + + withName: GATK4_MARKDUPLICATES_CRAM { + ext.args = '--CREATE_INDEX true' + ext.prefix = { "${meta.id}.cram" } + } + +} diff --git a/tests/modules/mskcc/gatk4/markduplicates/test.yml b/tests/modules/mskcc/gatk4/markduplicates/test.yml new file mode 100644 index 00000000..cab00e96 --- /dev/null +++ b/tests/modules/mskcc/gatk4/markduplicates/test.yml @@ -0,0 +1,36 @@ +- name: gatk4 markduplicates test_gatk4_markduplicates + command: nextflow run ./tests/modules/mskcc/gatk4/markduplicates -entry test_gatk4_markduplicates -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/markduplicates/nextflow.config + tags: + - gatk4 + - gatk4/markduplicates + files: + - path: output/gatk4/test.bai + md5sum: 26001bcdbce12e9f07557d8f7b8d360e + - path: output/gatk4/test.bam + md5sum: 2e17dfa6db576fd87be8b36fa2133c73 + - path: output/gatk4/test.bam.metrics + - path: output/gatk4/versions.yml + +- name: gatk4 markduplicates test_gatk4_markduplicates_multiple_bams + command: nextflow run ./tests/modules/mskcc/gatk4/markduplicates -entry test_gatk4_markduplicates_multiple_bams -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/markduplicates/nextflow.config + tags: + - gatk4 + - gatk4/markduplicates + files: + - path: output/gatk4/test.bai + md5sum: 529fbbad54edf512a1249c9be4258fba + - path: output/gatk4/test.bam + md5sum: f178378cbc335ab2447f49f32b767083 + - path: output/gatk4/test.bam.metrics + - path: output/gatk4/versions.yml + +- name: gatk4 markduplicates test_gatk4_markduplicates_multiple_cram_output + command: nextflow run ./tests/modules/mskcc/gatk4/markduplicates -entry test_gatk4_markduplicates_multiple_cram_output -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/markduplicates/nextflow.config + tags: + - gatk4 + - gatk4/markduplicates + files: + - path: output/gatk4/test.cram + - path: output/gatk4/test.cram.crai + - path: output/gatk4/test.cram.metrics + - path: output/gatk4/versions.yml diff --git a/tests/modules/mskcc/gatk4/markduplicatesspark/main.nf b/tests/modules/mskcc/gatk4/markduplicatesspark/main.nf new file mode 100644 index 00000000..830e9abb --- /dev/null +++ b/tests/modules/mskcc/gatk4/markduplicatesspark/main.nf @@ -0,0 +1,57 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_MARKDUPLICATES_SPARK } from '../../../../../modules/mskcc/gatk4/markduplicatesspark/main.nf' +include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_CRAM } from '../../../../../modules/mskcc/gatk4/markduplicatesspark/main.nf' +include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_METRICS } from '../../../../../modules/mskcc/gatk4/markduplicatesspark/main.nf' + +workflow test_gatk4_markduplicates_spark { + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + + GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) +} + +// chr 22 +workflow test_gatk4_markduplicates_spark_multiple_bams { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) + ] ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) +} + +// chr 22 +workflow test_gatk4_markduplicates_spark_multiple_bams_cram_out { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) + ] ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + GATK4_MARKDUPLICATES_SPARK_CRAM ( input, fasta, fai, dict ) +} + +// chr 22 +workflow test_gatk4_markduplicates_spark_multiple_bams_metrics { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) + ] ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + GATK4_MARKDUPLICATES_SPARK_METRICS ( input, fasta, fai, dict ) +} diff --git a/tests/modules/mskcc/gatk4/markduplicatesspark/nextflow.config b/tests/modules/mskcc/gatk4/markduplicatesspark/nextflow.config new file mode 100644 index 00000000..1b948a6a --- /dev/null +++ b/tests/modules/mskcc/gatk4/markduplicatesspark/nextflow.config @@ -0,0 +1,19 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: GATK4_MARKDUPLICATES_SPARK { + ext.prefix = { "${meta.id}.bam" } + } + withName: GATK4_MARKDUPLICATES_SPARK_CRAM { + ext.prefix = { "${meta.id}.cram" } + } + withName: GATK4_MARKDUPLICATES_SPARK_METRICS { + ext.args = '--metrics-file test.metrics' + ext.prefix = { "${meta.id}.bam" } + } + +} +// override ./tests/config/nextflow.config +docker.userEmulation = false + diff --git a/tests/modules/mskcc/gatk4/markduplicatesspark/test.yml b/tests/modules/mskcc/gatk4/markduplicatesspark/test.yml new file mode 100644 index 00000000..297d4b9f --- /dev/null +++ b/tests/modules/mskcc/gatk4/markduplicatesspark/test.yml @@ -0,0 +1,43 @@ +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark + command: nextflow run ./tests/modules/mskcc/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/markduplicatesspark/nextflow.config + tags: + - gatk4/markduplicatesspark + - gatk4 + files: + - path: output/gatk4/test.bam + md5sum: dc1a09ac6371aab7c50d1a554baa06d3 + - path: output/gatk4/test.bam.bai + md5sum: 253c47e57247a2cee11afcbb414122a4 + - path: output/gatk4/versions.yml + +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams + command: nextflow run ./tests/modules/mskcc/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/markduplicatesspark/nextflow.config + tags: + - gatk4/markduplicatesspark + - gatk4 + files: + - path: output/gatk4/test.bam + - path: output/gatk4/test.bam.bai + - path: output/gatk4/versions.yml + +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_cram_out + command: nextflow run ./tests/modules/mskcc/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_cram_out -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/markduplicatesspark/nextflow.config + tags: + - gatk4/markduplicatesspark + - gatk4 + files: + - path: output/gatk4/test.cram + - path: output/gatk4/versions.yml + +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_metrics + command: nextflow run ./tests/modules/mskcc/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_metrics -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/gatk4/markduplicatesspark/nextflow.config + tags: + - gatk4/markduplicatesspark + - gatk4 + files: + - path: output/gatk4/test.bam + md5sum: 898cb0a6616897d8ada90bab53bf0837 + - path: output/gatk4/test.bam.bai + md5sum: 7f7e858d1ded1cca89b373eb817fcb45 + - path: output/gatk4/test.metrics + - path: output/gatk4/versions.yml diff --git a/tests/modules/mskcc/samtools/faidx/main.nf b/tests/modules/mskcc/samtools/faidx/main.nf new file mode 100644 index 00000000..00d0963b --- /dev/null +++ b/tests/modules/mskcc/samtools/faidx/main.nf @@ -0,0 +1,49 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SAMTOOLS_FAIDX } from '../../../../../modules/mskcc/samtools/faidx/main.nf' + +workflow test_samtools_faidx { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + + SAMTOOLS_FAIDX ( input, [[],[]] ) +} + +workflow test_samtools_faidx_bgzip { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta_gz'], checkIfExists: true) ] + + SAMTOOLS_FAIDX ( input, [[],[]] ) +} + +workflow test_samtools_faidx_fasta { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + fai = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ] + + SAMTOOLS_FAIDX ( input, fai ) +} + +workflow test_samtools_faidx_stub_fasta { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + fai = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ] + + SAMTOOLS_FAIDX ( input, fai ) +} + +workflow test_samtools_faidx_stub_fai { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + + SAMTOOLS_FAIDX ( input, [[],[]] ) +} diff --git a/tests/modules/mskcc/samtools/faidx/nextflow.config b/tests/modules/mskcc/samtools/faidx/nextflow.config new file mode 100644 index 00000000..cb1472e0 --- /dev/null +++ b/tests/modules/mskcc/samtools/faidx/nextflow.config @@ -0,0 +1,11 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: 'test_samtools_faidx_fasta:SAMTOOLS_FAIDX' { + ext.args = 'MT192765.1 -o extract.fa' + } + withName: 'test_samtools_faidx_stub_fasta:SAMTOOLS_FAIDX' { + ext.args = '-o extract.fa' + } +} diff --git a/tests/modules/mskcc/samtools/faidx/test.yml b/tests/modules/mskcc/samtools/faidx/test.yml new file mode 100644 index 00000000..63016feb --- /dev/null +++ b/tests/modules/mskcc/samtools/faidx/test.yml @@ -0,0 +1,48 @@ +- name: samtools faidx test_samtools_faidx + command: nextflow run ./tests/modules/mskcc/samtools/faidx -entry test_samtools_faidx -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/samtools/faidx/nextflow.config + tags: + - samtools/faidx + - samtools + files: + - path: output/samtools/genome.fasta.fai + md5sum: 9da2a56e2853dc8c0b86a9e7229c9fe5 + - path: output/samtools/versions.yml + +- name: samtools faidx test_samtools_faidx_bgzip + command: nextflow run ./tests/modules/mskcc/samtools/faidx -entry test_samtools_faidx_bgzip -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/samtools/faidx/nextflow.config + tags: + - samtools/faidx + - samtools + files: + - path: output/samtools/genome.fasta.gz.fai + md5sum: 9da2a56e2853dc8c0b86a9e7229c9fe5 + - path: output/samtools/genome.fasta.gz.gzi + md5sum: 7dea362b3fac8e00956a4952a3d4f474 + - path: output/samtools/versions.yml + +- name: samtools faidx test_samtools_faidx_fasta + command: nextflow run ./tests/modules/mskcc/samtools/faidx -entry test_samtools_faidx_fasta -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/samtools/faidx/nextflow.config + tags: + - samtools/faidx + - samtools + files: + - path: output/samtools/extract.fa + - path: output/samtools/versions.yml + +- name: samtools faidx test_samtools_faidx_stub_fasta + command: nextflow run ./tests/modules/mskcc/samtools/faidx -entry test_samtools_faidx_stub_fasta -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/samtools/faidx/nextflow.config -stub + tags: + - samtools/faidx + - samtools + files: + - path: output/samtools/extract.fa + - path: output/samtools/versions.yml + +- name: samtools faidx test_samtools_faidx_stub_fai + command: nextflow run ./tests/modules/mskcc/samtools/faidx -entry test_samtools_faidx_stub_fai -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/samtools/faidx/nextflow.config -stub + tags: + - samtools/faidx + - samtools + files: + - path: output/samtools/genome.fasta.fai + - path: output/samtools/versions.yml diff --git a/tests/modules/mskcc/samtools/index/main.nf b/tests/modules/mskcc/samtools/index/main.nf new file mode 100644 index 00000000..a86b78c3 --- /dev/null +++ b/tests/modules/mskcc/samtools/index/main.nf @@ -0,0 +1,31 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_BAI } from '../../../../../modules/mskcc/samtools/index/main.nf' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_CRAI } from '../../../../../modules/mskcc/samtools/index/main.nf' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_CSI } from '../../../../../modules/mskcc/samtools/index/main.nf' + +workflow test_samtools_index_bai { + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + + SAMTOOLS_INDEX_BAI ( input ) +} + +workflow test_samtools_index_crai { + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true) + ] + + SAMTOOLS_INDEX_CRAI ( input ) +} + +workflow test_samtools_index_csi { + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + + SAMTOOLS_INDEX_CSI ( input ) +} diff --git a/tests/modules/mskcc/samtools/index/nextflow.config b/tests/modules/mskcc/samtools/index/nextflow.config new file mode 100644 index 00000000..d3a4c785 --- /dev/null +++ b/tests/modules/mskcc/samtools/index/nextflow.config @@ -0,0 +1,9 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: SAMTOOLS_INDEX_CSI { + ext.args = '-c' + } + +} diff --git a/tests/modules/mskcc/samtools/index/test.yml b/tests/modules/mskcc/samtools/index/test.yml new file mode 100644 index 00000000..511b4de8 --- /dev/null +++ b/tests/modules/mskcc/samtools/index/test.yml @@ -0,0 +1,29 @@ +- name: samtools index test_samtools_index_bai + command: nextflow run ./tests/modules/mskcc/samtools/index -entry test_samtools_index_bai -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/samtools/index/nextflow.config + tags: + - samtools/index + - samtools + files: + - path: output/samtools/test.paired_end.sorted.bam.bai + md5sum: 704c10dd1326482448ca3073fdebc2f4 + - path: output/samtools/versions.yml + +- name: samtools index test_samtools_index_crai + command: nextflow run ./tests/modules/mskcc/samtools/index -entry test_samtools_index_crai -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/samtools/index/nextflow.config + tags: + - samtools/index + - samtools + files: + - path: output/samtools/test.paired_end.recalibrated.sorted.cram.crai + md5sum: 14bc3bd5c89cacc8f4541f9062429029 + - path: output/samtools/versions.yml + +- name: samtools index test_samtools_index_csi + command: nextflow run ./tests/modules/mskcc/samtools/index -entry test_samtools_index_csi -c ./tests/config/nextflow.config -c ./tests/modules/mskcc/samtools/index/nextflow.config + tags: + - samtools/index + - samtools + files: + - path: output/samtools/test.paired_end.sorted.bam.csi + md5sum: 8d63373007553e74d823fc2b9cbcf84d + - path: output/samtools/versions.yml diff --git a/tests/subworkflows/mskcc/bwa_markdup_bqsr/main.nf b/tests/subworkflows/mskcc/bwa_markdup_bqsr/main.nf new file mode 100644 index 00000000..866226cc --- /dev/null +++ b/tests/subworkflows/mskcc/bwa_markdup_bqsr/main.nf @@ -0,0 +1,38 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { BWA_INDEX } from '../../../../modules/mskcc/bwa/index/main.nf' +include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../../modules/mskcc/gatk4/createsequencedictionary/main.nf' +include { SAMTOOLS_FAIDX } from '../../../../modules/mskcc/samtools/faidx/main.nf' +include { BWA_MARKDUP_BQSR } from '../../../../subworkflows/mskcc/bwa_markdup_bqsr/main.nf' + +workflow test_bwa_markdup_bqsr { + + input = [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + + BWA_INDEX ( [[id: 'testfa'],fasta] ) + SAMTOOLS_FAIDX ( [[id: 'testfa'],fasta],[[],[]] ) + GATK4_CREATESEQUENCEDICTIONARY([[id: 'testfa'],fasta]) + + BWA_MARKDUP_BQSR( + input, + fasta, + SAMTOOLS_FAIDX.out.fai.map{ it[1] }.first(), + BWA_INDEX.out.index, + GATK4_CREATESEQUENCEDICTIONARY.out.dict.map{ it[1] }.first(), + [], + [], + false + + + ) +} diff --git a/tests/subworkflows/mskcc/bwa_markdup_bqsr/nextflow.config b/tests/subworkflows/mskcc/bwa_markdup_bqsr/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/tests/subworkflows/mskcc/bwa_markdup_bqsr/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/subworkflows/mskcc/bwa_markdup_bqsr/test.yml b/tests/subworkflows/mskcc/bwa_markdup_bqsr/test.yml new file mode 100644 index 00000000..16195a4d --- /dev/null +++ b/tests/subworkflows/mskcc/bwa_markdup_bqsr/test.yml @@ -0,0 +1,36 @@ +- name: bwa_markdup_bqsr test_bwa_markdup_bqsr + command: nextflow run ./tests/subworkflows/mskcc/bwa_markdup_bqsr -entry test_bwa_markdup_bqsr -c ./tests/config/nextflow.config + tags: + - bwa + - bwa/mem + - gatk4 + - gatk4/applybqsr + - gatk4/applybqsr/spark + - gatk4/baserecalibrator + - gatk4/baserecalibrator/spark + - gatk4/markduplicates + - gatk4/markduplicates/spark + - samtools + - samtools/index + - subworkflows + - subworkflows/bwa_markdup_bqsr + files: + - path: output/bwa/bwa/genome.amb + md5sum: 3a68b8b2287e07dd3f5f95f4344ba76e + - path: output/bwa/bwa/genome.ann + md5sum: c32e11f6c859f166c7525a9c1d583567 + - path: output/bwa/bwa/genome.bwt + md5sum: 0469c30a1e239dd08f68afe66fde99da + - path: output/bwa/bwa/genome.pac + md5sum: 983e3d2cd6f36e2546e6d25a0da78d66 + - path: output/bwa/bwa/genome.sa + md5sum: ab3952cabf026b48cd3eb5bccbb636d1 + - path: output/bwa/test.bam + md5sum: cf74076b67bb4c9b9c7b2d88964bf5d9 + - path: output/gatk4/genome.dict + md5sum: 7362679f176e0f52add03c08f457f646 + - path: output/gatk4/test.bam.metrics + contains: + - "picard.sam.DuplicationMetrics" + - path: output/samtools/genome.fasta.fai + md5sum: 9da2a56e2853dc8c0b86a9e7229c9fe5