Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
rev: v6.0.0
hooks:
- id: check-yaml
- repo: https://github.com/snakemake/snakefmt
rev: v0.6.1
rev: v2.0.1
hooks:
- id: snakefmt
- repo: https://github.com/psf/black
rev: 22.6.0
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 26.5.1
hooks:
- id: black
- repo: https://github.com/PyCQA/isort.git
rev: 5.10.1
rev: 9.0.0a3
hooks:
- id: isort
- repo: https://github.com/python-poetry/poetry
Expand Down
2 changes: 0 additions & 2 deletions mccoy/workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import yaml
import snakemake
import pathlib


PROJECT_DIR = Path(config['project_path'])
RESOURCES_DIR = PROJECT_DIR / "resources"
INPUT_DATA = config['data']
Expand Down Expand Up @@ -44,7 +43,6 @@ onstart:
print("Workflow paths:")
print(f"\t{'snakefile':20s} ➡ {workflow.snakefile}")
print(f"\t{'working directory':20s} ➡ {workflow.basedir}")

print("Environment:")
shell = lambda cmd: subprocess.run(cmd, shell=True, stdout=subprocess.PIPE).stdout.decode().rstrip()
print(f"\t{shell('python --version'):20s} ➡ {shell('which python')}")
Expand Down
50 changes: 25 additions & 25 deletions mccoy/workflow/rules/align.smk
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
rule align:
"""
Use `MAFFT <https://github.com/GSLBiotech/mafft>`_ to align the combined sequence file against the project
reference.
Use `MAFFT <https://github.com/GSLBiotech/mafft>`_ to align the combined sequence file against the project
reference.

:input original: the combined sequence file generated from the :smk:ref:`combine` rule
:input reference: the project reference sequence, provided during McCoy project creation
:input original: the combined sequence file generated from the :smk:ref:`combine` rule
:input reference: the project reference sequence, provided during McCoy project creation

:config align.mafft: a list of command line arguments passed directly to MAFFT
:config align.threads: the number of threads (cores) to use for a single MAFFT call
:config align.resources: the resources to request when submitting to a cluster
:config align.mafft: a list of command line arguments passed directly to MAFFT
:config align.threads: the number of threads (cores) to use for a single MAFFT call
:config align.resources: the resources to request when submitting to a cluster

:output: the aligned version of the original input file
:params: the command-line arguments passed to MAFFT (set in `align.mafft` config entry)
:threads: set to `align.threads` from the config file if present, else set by the number of cores available to the workflow (up-to `threads_max`)
:resources: set to `align.resources` in the project config, if present
"""
:output: the aligned version of the original input file
:params: the command-line arguments passed to MAFFT (set in `align.mafft` config entry)
:threads: set to `align.threads` from the config file if present, else set by the number of cores available to the workflow (up-to `threads_max`)
:resources: set to `align.resources` in the project config, if present
"""
input:
original="data/combined/{id}.fasta",
reference=RESOURCES_DIR / "reference.fasta",
Expand All @@ -24,25 +24,23 @@ rule align:
"logs/align-{id}.txt",
conda:
"../envs/mafft.yml"
params:
lambda wildcards: " ".join(config["align"]["mafft"]),
threads: config["align"].get("threads", config["all"]["threads_max"])
resources:
**config["align"].get("resources", {}),
params:
lambda wildcards: " ".join(config["align"]["mafft"]),
shell:
"""
REFNAME=$(head -n1 {input.reference} | tr -d '>')
mafft --thread {threads} {params} {input.original} {input.reference} 2> {log} \
| seqkit grep -rvip "^$REFNAME" > {output} 2> {log}
mafft --thread {threads} {params} {input.original} {input.reference} 2>{log} \
| seqkit grep -rvip "^$REFNAME" >{output} 2>{log}
"""


rule alignment_stats:
input:
alignment=rules.align.output,
reference=RESOURCES_DIR / "reference.fasta",
conda:
"../envs/steenwyk.yml"
output:
summary="results/aligned/{id}.summary.txt",
gc_content="results/aligned/{id}.gc_content.txt",
Expand All @@ -51,13 +49,15 @@ rule alignment_stats:
pairwise_identity_verbose="results/aligned/{id}.pairwise_identity_verbose.txt",
# position_specific_score_matrix="results/aligned/{id}.position_specific_score_matrix.txt",
# sum_of_pairs_score="results/aligned/{id}.sum_of_pairs_score.txt",
conda:
"../envs/steenwyk.yml"
shell:
"""
biokit alignment_summary {input.alignment} > {output.summary}
phykit gc_content {input.alignment} > {output.gc_content}
phykit relative_composition_variability {input.alignment} > {output.relative_composition_variability}
phykit pairwise_identity {input.alignment} > {output.pairwise_identity}
phykit pairwise_identity {input.alignment} --verbose > {output.pairwise_identity_verbose}
biokit alignment_summary {input.alignment} >{output.summary}
phykit gc_content {input.alignment} >{output.gc_content}
phykit relative_composition_variability {input.alignment} >{output.relative_composition_variability}
phykit pairwise_identity {input.alignment} >{output.pairwise_identity}
phykit pairwise_identity {input.alignment} --verbose >{output.pairwise_identity_verbose}

# phykit sum_of_pairs_score {input.alignment} --reference {input.reference} > output.sum_of_pairs_score
# biokit position_specific_score_matrix {input.alignment} > output.position_specific_score_matrix
Expand All @@ -67,11 +67,11 @@ rule alignment_stats:
rule pairwise_identity_histogram:
input:
rules.alignment_stats.output.pairwise_identity_verbose,
conda:
"../envs/plot_traces.yml"
output:
svg="results/aligned/{id}.pairwise_identity_verbose.svg",
html="results/aligned/{id}.pairwise_identity_verbose.html",
conda:
"../envs/plot_traces.yml"
shell:
"""
${{CONDA_PREFIX}}/bin/python {SCRIPT_DIR}/pairwise_identity_histogram.py {input} {output.svg} {output.html}
Expand Down
99 changes: 50 additions & 49 deletions mccoy/workflow/rules/beast.smk
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
rule onlinebeast:
"""
Use `online-beast <https://github.com/Wytamma/online-beast>`_ to add any new
sequences to the Beast2 analysis from an inherited run and update the state.
Use `online-beast <https://github.com/Wytamma/online-beast>`_ to add any new
sequences to the Beast2 analysis from an inherited run and update the state.

.. warning::
This rule will only run if the ``--inherit`` or ``--inherit-last`` flags are passed to McCoy.
.. warning::
This rule will only run if the ``--inherit`` or ``--inherit-last`` flags are passed to McCoy.


:input xml: the template file generated by the :smk:ref:`dynamicbeast` rule
:input state: the statefile from the inherited McCoy run.
This is compied into the ``data`` directory by the McCoy CLI.
:input alignment: the aligned sequences from the :smk:ref:`align` rule
:input xml: the template file generated by the :smk:ref:`dynamicbeast` rule
:input state: the statefile from the inherited McCoy run.
This is compied into the ``data`` directory by the McCoy CLI.
:input alignment: the aligned sequences from the :smk:ref:`align` rule

:output: the updated state file produced by online-beast.
:output: the updated state file produced by online-beast.

**Note:** No XML file is produced as we are using a template XML which
doesn't actually contain the sequences in it.
"""
**Note:** No XML file is produced as we are using a template XML which
doesn't actually contain the sequences in it.
"""
input:
xml=rules.dynamicbeast.output,
state="data/{id}-beast.xml.state",
Expand Down Expand Up @@ -44,34 +44,35 @@ def beast_params(wildcards):

rule beast:
"""
Run Beast2, either restarting from a state file or from scratch.
Run Beast2, either restarting from a state file or from scratch.

.. note::
GPU acceleration is requested if available by default. If you are running on a machine with a compatible GPU then
the code will crash when using the bioconda package. To avoid this, either:
.. note::
GPU acceleration is requested if available by default. If you are running on a machine with a compatible GPU then
the code will crash when using the bioconda package. To avoid this, either:

1. ensure you pass ``--use-envmodules`` to McCoy and set the ``envmodules`` directives of this rule appropriately, or
2. remove the ``-beagle_GPU`` flag from the ``beast.beast`` entry in your McCoy config file.
1. ensure you pass ``--use-envmodules`` to McCoy and set the ``envmodules`` directives of this rule appropriately, or
2. remove the ``-beagle_GPU`` flag from the ``beast.beast`` entry in your McCoy config file.

:input alignment: the aligned fasta file output from :smk:ref:`align`
:input template: the Beast 2 input XML file, templated with `feast <https://github.com/tgvaughan/feast>`_.
If ``inherit`` is set in the config then the output of the :smk:ref:`onlinebeast` rule is used,
otherwise the output of the :smk:ref:`dynamicbeast` rule is used.
:input alignment: the aligned fasta file output from :smk:ref:`align`
:input template: the Beast 2 input XML file, templated with `feast <https://github.com/tgvaughan/feast>`_.
If ``inherit`` is set in the config then the output of the :smk:ref:`onlinebeast` rule is used,
otherwise the output of the :smk:ref:`dynamicbeast` rule is used.

:output: the tree log, trace log, and statefile from Beast2
:output: the tree log, trace log, and statefile from Beast2

:config inherit: are we inheriting from a previous run?
:config beast.dynamic: the dynamic variables used to populate the feast template.
:config beast.beast: Beast2 command line arguments to pass (beyond the params, statefile and input)
:config beast.threads: the number of cores to run with (both locally or when submitting to a cluster)
:config beast.resources: the resources to request when submitting to a cluster
:config inherit: are we inheriting from a previous run?
:config beast.dynamic: the dynamic variables used to populate the feast template.
:config beast.beast: Beast2 command line arguments to pass (beyond the params, statefile and input)
:config beast.threads: the number of cores to run with (both locally or when submitting to a cluster)
:config beast.resources: the resources to request when submitting to a cluster

:envmodules: environment variables to load for the Spartan HPC system
:envmodules: environment variables to load for the Spartan HPC system

..note::
GPU acceleration is **not** requested by default. If you are running on a machine with a compatible GPU then
please replace ``-beagle`` with ``-beagle_GPU`` in the ``beast.beast`` entry in your McCoy ``config.yaml`` file.
"""
..note::
GPU acceleration is **not** requested by default. If you are running on a machine with a compatible GPU then
please replace ``-beagle`` with ``-beagle_GPU`` in the ``beast.beast`` entry in your McCoy ``config.yaml`` file.

"""
input:
alignment=rules.align.output,
template=rules.dynamicbeast.output,
Expand All @@ -84,25 +85,25 @@ rule beast:
"logs/{id}_beast.log",
conda:
"../envs/beast.yml"
params:
dynamic=lambda wildcards: ",".join(config["beast"]["dynamic"]),
beast=beast_params,
envmodules:
*config["beast"].get("envmodules", []),
threads: config["beast"].get("threads", config["all"]["threads_max"])
resources:
**config["beast"].get("resources", {}),
envmodules:
*config["beast"].get("envmodules", []),
params:
dynamic=lambda wildcards: ",".join(config["beast"]["dynamic"]),
beast=beast_params,
shell:
"""
if [[ -n "{input.statefile}" ]]; then cp {input.statefile} {output.statefile}; fi
beast -D 'alignment={input.alignment},tracelog={output.tracelog},treelog={output.treelog},mcmc.threads={threads},{params.dynamic}' {params.beast} -statefile {output.statefile} {input.template} 1>&2 2> {log}
beast -D 'alignment={input.alignment},tracelog={output.tracelog},treelog={output.treelog},mcmc.threads={threads},{params.dynamic}' {params.beast} -statefile {output.statefile} {input.template} 1>&2 2>{log}
"""


rule plot_traces:
"""
Makes trace plots from the beast log file.
"""
Makes trace plots from the beast log file.
"""
input:
expand(rules.beast.output.tracelog, id=config['id']),
output:
Expand All @@ -117,8 +118,8 @@ rule plot_traces:

rule arviz:
"""
Makes trace plots from the beast log file.
"""
Makes trace plots from the beast log file.
"""
input:
expand(rules.beast.output.tracelog, id=config['id']),
output:
Expand All @@ -135,8 +136,8 @@ rule arviz:

rule max_clade_credibility_tree:
"""
Makes trace plots from the beast log file.
"""
Makes trace plots from the beast log file.
"""
input:
expand(rules.beast.output.treelog, id=config['id']),
output:
Expand All @@ -151,8 +152,8 @@ rule max_clade_credibility_tree:

rule max_clade_credibility_tree_newick:
"""
Makes trace plots from the beast log file.
"""
Makes trace plots from the beast log file.
"""
input:
expand(rules.max_clade_credibility_tree.output, id=config['id']),
output:
Expand All @@ -165,8 +166,8 @@ rule max_clade_credibility_tree_newick:

rule max_clade_credibility_tree_render:
"""
Renders the consensus maximum likelihood tree from iqtree in SVG and HTML format.
"""
Renders the consensus maximum likelihood tree from iqtree in SVG and HTML format.
"""
input:
expand(rules.max_clade_credibility_tree_newick.output, id=config['id']),
output:
Expand Down
10 changes: 5 additions & 5 deletions mccoy/workflow/rules/combine.smk
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
rule combine:
"""
Combine multiple sequence files together into a single file.
Combine multiple sequence files together into a single file.

:input data: the sequence files to be concatenated
:output: a single concatenated fasta file
"""
:input data: the sequence files to be concatenated
:output: a single concatenated fasta file
"""
input:
data=INPUT_DATA,
output:
Expand All @@ -15,5 +15,5 @@ rule combine:
"../envs/combine.yml"
shell:
"""
cat {input.data} | sed s/\@/_/g | seqkit rmdup -n -o {output} 2> {log}
cat {input.data} | sed s/\@/_/g | seqkit rmdup -n -o {output} 2>{log}
"""
20 changes: 10 additions & 10 deletions mccoy/workflow/rules/dynamicbeast.smk
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
rule dynamicbeast:
"""
Use `dynamicbeast <https://github.com/Wytamma/dynamic-beast>`_ to generate
a dynamic BEAST2 XML template from a standard static one, for use with
`feast <https://github.com/tgvaughan/feast>`_.
Use `dynamicbeast <https://github.com/Wytamma/dynamic-beast>`_ to generate
a dynamic BEAST2 XML template from a standard static one, for use with
`feast <https://github.com/tgvaughan/feast>`_.

:input template: the BEAST XML template file
:input phytest_report: the phytest report file generated by the :smk:ref:`phytest` rule.
By using this as an input, we ensure that this rule (and all downstream rules)
only run when our quality control checks pass.
:input template: the BEAST XML template file
:input phytest_report: the phytest report file generated by the :smk:ref:`phytest` rule.
By using this as an input, we ensure that this rule (and all downstream rules)
only run when our quality control checks pass.

:output: the dynamic XML template file to be used by the :smk:ref:`onlinebeast` and/or :smk:ref:`beast` rules
"""
:output: the dynamic XML template file to be used by the :smk:ref:`onlinebeast` and/or :smk:ref:`beast` rules
"""
input:
template=RESOURCES_DIR / "template.xml",
phytest_report="results/{id}-phytest.html",
Expand All @@ -22,5 +22,5 @@ rule dynamicbeast:
"../envs/dynamicbeast.yml"
shell:
"""
dynamic-beast {input.template} > {output} 2> {log}
dynamic-beast {input.template} >{output} 2>{log}
"""
14 changes: 7 additions & 7 deletions mccoy/workflow/rules/phytest.smk
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
rule phytest:
"""
Run quality control checks using `phytest <https://github.com/phytest-devs/phytest>`_.
Run quality control checks using `phytest <https://github.com/phytest-devs/phytest>`_.

:input alignment: the aligned fasta file from the :smk:ref:`align` rule
:input tree: the maximum likelihood tree generated by the :smk:ref:`tree` rule
:input phytest_file: the phytest test file
:input alignment: the aligned fasta file from the :smk:ref:`align` rule
:input tree: the maximum likelihood tree generated by the :smk:ref:`tree` rule
:input phytest_file: the phytest test file

:output: The phytest html report, placed into the Snakemake report
"""
:output: The phytest html report, placed into the Snakemake report
"""
input:
alignment="results/aligned/{id}.fasta",
tree="results/tree/{id}.fasta.treefile",
Expand All @@ -20,5 +20,5 @@ rule phytest:
"../envs/phytest.yml"
shell:
"""
phytest {input.phytest_file} -s {input.alignment} -t {input.tree} --report {output} -v > {log}
phytest {input.phytest_file} -s {input.alignment} -t {input.tree} --report {output} -v >{log}
"""
Loading
Loading