From 98f9ea1f1121d250613b23be2d07c5a808e5c282 Mon Sep 17 00:00:00 2001 From: Thomas Cokelaer Date: Sat, 21 Mar 2026 14:40:24 +0100 Subject: [PATCH 1/2] use new sequana-wrappers --- README.rst | 174 ++++++++-------- pyproject.toml | 8 +- sequana_pipelines/multicov/__init__.py | 9 +- sequana_pipelines/multicov/config.yaml | 4 + sequana_pipelines/multicov/main.py | 186 ++++++------------ sequana_pipelines/multicov/multicov.rules | 73 ++++++- .../multicov/{requirements.txt => tools.txt} | 0 setup.cfg | 14 -- setup.py | 70 ------- test/test_main.py | 36 ++-- 10 files changed, 252 insertions(+), 322 deletions(-) rename sequana_pipelines/multicov/{requirements.txt => tools.txt} (100%) delete mode 100644 setup.cfg delete mode 100644 setup.py diff --git a/README.rst b/README.rst index 17ac4ab..dbc7245 100644 --- a/README.rst +++ b/README.rst @@ -1,41 +1,41 @@ -This is is the **coverage** pipeline from the `Sequana `_ project - .. image:: https://badge.fury.io/py/sequana-multicov.svg :target: https://pypi.python.org/pypi/sequana_multicov +.. image:: https://github.com/sequana/multicov/actions/workflows/main.yml/badge.svg + :target: https://github.com/sequana/multicov/actions/workflows/main.yml + +.. image:: https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10-blue.svg + :target: https://pypi.python.org/pypi/sequana_multicov + :alt: Python 3.8 | 3.9 | 3.10 + .. image:: http://joss.theoj.org/papers/10.21105/joss.00352/status.svg :target: http://joss.theoj.org/papers/10.21105/joss.00352 :alt: JOSS (journal of open source software) DOI -.. image:: https://github.com/sequana/multicov/actions/workflows/main.yml/badge.svg - :target: https://github.com/sequana/multicov/actions/workflows - +This is the **multicov** pipeline from the `Sequana `_ project -:Overview: Parallelised version of sequana_coverage for large eukaryotes genome. -:Input: A set of BAM or BED files. BED file must have 3 or 4 columns. First column is - the chromosome/contig name, second column stored positions and third the - coverage. Fourth optional columns contains a filtered coverage (not used in - the analysis but shown in the HTML reports) -:Output: a set of HTML reports for each chromosomes and a multiqc report -:Status: production -:Citation: +:Overview: Parallelised version of sequana_coverage for multi-sample genomic coverage analysis and CNV detection +:Input: A set of BED files (3 or 4 columns: chromosome, position, coverage, optional filtered coverage) +:Output: Per-sample HTML coverage reports, a MultiQC report, and a summary.html with links to all reports +:Status: Production +:Documentation: This README file and https://sequana.readthedocs.io +:Citation: Dimitri Desvillechabrol, Christiane Bouchier, Sean Kennedy, Thomas Cokelaer - *Sequana coverage: detection and characterization of genomic variations + *Sequana coverage: detection and characterization of genomic variations using running median and mixture models* - GigaScience, Volume 7, Issue 12, December 2018, giy110, + GigaScience, Volume 7, Issue 12, December 2018, giy110, https://doi.org/10.1093/gigascience/giy110 - and + and - Cokelaer et al, (2017), ‘Sequana’: a Set of Snakemake NGS pipelines, Journal of Open Source Software, 2(16), 352, JOSS DOI https://doi:10.21105/joss.00352 + Cokelaer et al, (2017), 'Sequana': a Set of Snakemake NGS pipelines, Journal of Open Source Software, 2(16), 352, JOSS DOI https://doi:10.21105/joss.00352 Installation ~~~~~~~~~~~~ - -sequana_multicov is based on Python3, just install the package as follows:: +If you already have all requirements, install the package using pip:: pip install sequana_multicov --upgrade @@ -43,109 +43,107 @@ sequana_multicov is based on Python3, just install the package as follows:: Usage ~~~~~ -:: +Scan BED files in a directory and set up the pipeline (replace ``DATAPATH`` with your input directory):: - sequana_multicov --help - sequana_multicov --input-directory DATAPATH + sequana_multicov --input-directory DATAPATH -By default, this looks for BED file. WARNING. This are BED3 meaning a 3-columns -tabulated file like this one:: +To provide a reference FASTA file for GC content plots:: - chr1 1 10 - chr1 2 11 - ... - chr1 N1 10 - chr2 1 20 - chr2 2 21 - ... - chr2 N2 20 + sequana_multicov --input-directory DATAPATH --reference-file genome.fa -where the first column stored the chromosome name, the second is the position -and the third is the coverage itself. See sequana_coverage documentation for -details. If you have BAM files as input, we will do the conversion for you. In -such case, use this option:: +To provide a GenBank annotation file for event annotation:: - --input-pattern "*.bam" + sequana_multicov --input-directory DATAPATH --annotation-file genome.gbk -The sequana_coverage script creates a directory with the pipeline and -its configuration file. You will then need -to execute the pipeline:: +This creates a ``multicov/`` directory with the pipeline and configuration file. Execute the pipeline locally:: - cd coverage - sh coverage.sh # for a local run + cd multicov + sh multicov.sh -This launch a snakemake pipeline. If you are familiar with snakemake, you can -retrieve the pipeline itself and its configuration files and then execute the pipeline yourself with specific parameters:: +If you are familiar with Snakemake, you can also run the pipeline directly:: - snakemake -s multicov.rules -c config.yaml --cores 4 --stats stats.txt + snakemake -s multicov.rules --cores 4 --stats stats.txt -Or use `sequanix `_ interface as follows:: +See ``.sequana/profile/config.yaml`` to tune Snakemake behaviour (cores, cluster settings, etc.). - sequanix -w analysis -i . -p coverage +Usage with apptainer +~~~~~~~~~~~~~~~~~~~~~ -Go to the second panel, in Input data and then in Input directory. There, you -must modify the pattern (empty field by default meaning search for fastq files) -and set the field to either:: +With apptainer, initiate the working directory as follows:: - *.bed + sequana_multicov --input-directory DATAPATH --use-apptainer -or:: +Images are downloaded in the working directory but you can store them in a shared location:: - *.bam + sequana_multicov --input-directory DATAPATH --use-apptainer --apptainer-prefix ~/.sequana/apptainers +and then:: -You are ready to go. Save the project and press Run. Once done, open the HTML report. + cd multicov + sh multicov.sh -Requirements -~~~~~~~~~~~~ +Input format +~~~~~~~~~~~~~ -This pipelines requires the following executable(s): +BED files must have 3 or 4 tab-separated columns:: -- sequana_coverage from **Sequana**, which should be installed automatically. -- multiqc + chr1 1 10 + chr1 2 11 + ... + chr2 1 20 + chr2 2 21 + ... -.. .. image:: https://raw.githubusercontent.com/sequana/multicov/master/sequana_pipelines/multicov/dag.png +where the first column is the chromosome/contig name, the second is the position (1-based, sorted), and the third is the coverage depth. An optional fourth column may contain a filtered coverage signal (shown in reports but not used in the analysis). +If you only have BAM files, convert them with:: -Details -~~~~~~~~~ + samtools depth -aa input.bam > output.bed -This pipeline runs **coverage** in parallel on the input BAM files (or BED file). +For a specific chromosome only:: + samtools depth -aa -r chr1 input.bam > chr1.bed -The coverage tool takes as input a BAM or a BED file. The BED file must have 3 -or 4 columns as explained in the standalone application (sequana_coverage) -`documentation `_. -In short, the first column is the chromosome name, the second column is the -position (sorted) and the third column is the coverage (an optional fourth -column would contain a coverage signal, which could be high quality coverage for -instance). +For CRAM files, convert to BAM first:: -If you have only BAM files, you can convert them using **bioconvert** tool or -the command:: + samtools view -@ 4 -T reference.fa -b -o out.bam in.cram - samtools depth -aa input.bam > output.bed -If you have a CRAM file:: +Requirements +~~~~~~~~~~~~ + +This pipeline requires the following executables: + +- **sequana_coverage** — from the `Sequana `_ package (installed automatically) +- **multiqc** — aggregated HTML report across samples + +Install all dependencies at once:: - samtools view -@ 4 -T reference.fa -b -o out.bam in.cram + mamba env create -f environment.yml -For very large BAM/BED files, we recommend to split the BED file by -chromosomes. For instance for the chromosome chr1, type:: - # samtools index in.bam - samtools depth -aa input.bam -r chr1 in.bam > chr1.bed +Details +~~~~~~~~~ + +This pipeline runs **sequana_coverage** in parallel across all input BED files. For each sample it produces a standalone HTML report with: + +- coverage plots and running-median normalisation +- ROI (region of interest) detection using z-score thresholds +- CNV clustering +- GC content overlay (when a reference FASTA is provided) +- Event annotation (when a GenBank file is provided) -The standalone or Snakemake application can also take as input your BAM file and -will convert it automatically into a BED file. +On success, a ``summary.html`` is generated listing all samples with direct links to their individual reports, plus a MultiQC report aggregating key statistics across samples. + +For very large genomes the ``--binning`` and ``--chunksize`` options can be used to reduce memory usage. Rules and configuration details ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Here is the `latest documented configuration file `_ -to be used with the pipeline. Each rule used in the pipeline may have a section in the configuration file. +to be used with the pipeline. Each rule used in the pipeline may have a section in the configuration file. Changelog @@ -154,10 +152,20 @@ Changelog ========= ==================================================================== Version Description ========= ==================================================================== +1.2.0 * convert packaging from setup.py to pyproject.toml (Poetry) + * add apptainer container for sequana_coverage rule + * add summary.html report with sample count and per-sample links 1.1.0 * set apptainer containers and use wrappers -1.0.0 * renamed into multicov. +1.0.0 * renamed into multicov * update to use latest sequana_pipetools (v0.9.2) 0.9.1 * rename genbank field into annotation, window into window_size 0.9.0 * first version ========= ==================================================================== + +Contribute & Code of Conduct +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To contribute to this project, please take a look at the +`Contributing Guidelines `_ first. Please note that this project is released with a +`Code of Conduct `_. By contributing to this project, you agree to abide by its terms. diff --git a/pyproject.toml b/pyproject.toml index 060d3d7..c6bc8f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,10 +34,10 @@ packages = [ [tool.poetry.dependencies] python = ">=3.8,<4.0" -sequana = ">=0.15.1" -sequana_pipetools = ">=0.12.4" -click-completion = "^0.5.2" -sequana-wrappers = "^26.3.20" +sequana = ">=0.20" +sequana_pipetools = ">=1.5.0" +click-completion = ">=0.5.2" +sequana-wrappers = ">=26.3.21" [tool.poetry.scripts] diff --git a/sequana_pipelines/multicov/__init__.py b/sequana_pipelines/multicov/__init__.py index d98707f..03dc782 100644 --- a/sequana_pipelines/multicov/__init__.py +++ b/sequana_pipelines/multicov/__init__.py @@ -1,6 +1,5 @@ -import pkg_resources try: - version = pkg_resources.require("sequana_fastqc")[0].version -except: - version = ">=0.8.0" - + from importlib.metadata import version + __version__ = version("sequana_multicov") +except Exception: + __version__ = ">=1.1.1" diff --git a/sequana_pipelines/multicov/config.yaml b/sequana_pipelines/multicov/config.yaml index 807668a..ca77497 100644 --- a/sequana_pipelines/multicov/config.yaml +++ b/sequana_pipelines/multicov/config.yaml @@ -35,6 +35,10 @@ input_pattern: # :param cnv_clustering: further clustering to merge detected events whose # distance is smaller than this parameter # +apptainers: + sequana_coverage: https://zenodo.org/record/18257162/files/sequana_tools_26.1.14.img + + sequana_coverage: circular: True window_size: 20001 diff --git a/sequana_pipelines/multicov/main.py b/sequana_pipelines/multicov/main.py index c5b68ef..809097f 100755 --- a/sequana_pipelines/multicov/main.py +++ b/sequana_pipelines/multicov/main.py @@ -3,9 +3,6 @@ # # Copyright (c) 2016-2021 - Sequana Development Team # -# File author(s): -# Thomas Cokelaer -# # Distributed under the terms of the 3-clause BSD license. # The full license is in the LICENSE file, distributed with this software. # @@ -13,138 +10,81 @@ # documentation: http://sequana.readthedocs.io # ############################################################################## -import shutil -import sys import os -import argparse import subprocess +import rich_click as click +import click_completion + +click_completion.init() + from sequana_pipetools.options import * -from sequana_pipetools.options import before_pipeline -from sequana_pipetools.misc import Colors -from sequana_pipetools.info import sequana_epilog, sequana_prolog from sequana_pipetools import SequanaManager -col = Colors() - NAME = "multicov" - -class Options(argparse.ArgumentParser): - def __init__(self, prog=NAME, epilog=None): - usage = col.purple(sequana_prolog.format(**{"name": NAME})) - super(Options, self).__init__( - usage=usage, - prog=prog, - description="", - epilog=epilog, - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - - # add a new group of options to the parser - so = SlurmOptions() - so.add_options(self) - - # add a snakemake group of options to the parser - so = SnakemakeOptions(working_directory=NAME) - so.add_options(self) - - so = InputOptions(input_pattern="*.bed") - so.add_options(self) - - so = GeneralOptions() - so.add_options(self) - - pipeline_group = self.add_argument_group("pipeline") - - pipeline_group.add_argument("-o", "--circular", action="store_true") - pipeline_group.add_argument("--double-threshold", default=0.5) - pipeline_group.add_argument("--genbank", default=None, - help="the genbank to annotate the events found") - pipeline_group.add_argument("--reference", default=None, - help="the genome reference used to plot GC content") - pipeline_group.add_argument("--high-threshold", default=4) - pipeline_group.add_argument("--low-threshold", default=-4) - pipeline_group.add_argument("--mixture-models", default=2, type=int, - help="""Number of models to use in the mixture model. (default 2). - No need to change this value. Possibly, you may want to set - to 1 or 3 in some rate occasions. """) - pipeline_group.add_argument("--window", default=20000, type=int, - help="""Length of the running median window. Keep to 20000 as much as - possible. This allows the detection of CNV up to 10kb. If longer - event are present, increase this window size.""") - pipeline_group.add_argument("--chunksize", default=5000000, type=int) - pipeline_group.add_argument("--binning", default=-1, type=int) - pipeline_group.add_argument("--cnv-clustering", default=-1) - - self.add_argument("--run", default=False, action="store_true", - help="execute the pipeline directly") - - def parse_args(self, *args): - args_list = list(*args) - if "--from-project" in args_list: - if len(args_list) > 2: - msg = ( - "WARNING [sequana]: With --from-project option, " - + "pipeline and data-related options will be ignored." - ) - print(col.error(msg)) - for action in self._actions: - if action.required is True: - action.required = False - options = super(Options, self).parse_args(*args) - return options - - -def main(args=None): - - if args is None: - args = sys.argv - - # whatever needs to be called by all pipeline before the options parsing - before_pipeline(NAME) - - # option parsing including common epilog - options = Options(NAME, epilog=sequana_epilog).parse_args(args[1:]) - - # the real stuff is here +help = init_click( + NAME, + groups={ + "Pipeline Specific": [ + "--annotation-file", + "--reference-file", + "--circular", + "--double-threshold", + "--high-threshold", + "--low-threshold", + "--mixture-models", + "--window", + "--chunksize", + "--binning", + "--cnv-clustering", + ], + }, +) + + +@click.command(context_settings=help) +@include_options_from(ClickSnakemakeOptions, working_directory=NAME) +@include_options_from(ClickSlurmOptions) +@include_options_from(ClickInputOptions, input_pattern="*.bed", add_input_readtag=False) +@include_options_from(ClickGeneralOptions) +@click.option("--annotation-file", default=None, help="Genbank file to annotate detected events") +@click.option("--reference-file", default=None, help="Genome reference FASTA file used to plot GC content") +@click.option("--circular", is_flag=True, help="Set if the genome is circular") +@click.option("--double-threshold", default=0.5, show_default=True, help="Double threshold for clustering") +@click.option("--high-threshold", default=4.0, show_default=True, help="High threshold for ROI detection") +@click.option("--low-threshold", default=-4.0, show_default=True, help="Low threshold for ROI detection") +@click.option( + "--mixture-models", + default=2, + show_default=True, + help="Number of mixture models. Set to 1 or 3 in rare occasions", +) +@click.option( + "--window", + default=20000, + show_default=True, + help="Running median window size. Keep at 20000 to detect CNV up to 10kb", +) +@click.option("--chunksize", default=5000000, show_default=True, help="Chunk size for large genomes") +@click.option("--binning", default=-1, show_default=True, help="Bin size for large genomes (-1 to disable)") +@click.option( + "--cnv-clustering", default=-1, show_default=True, help="Merge events closer than this distance (-1 to disable)" +) +def main(**options): manager = SequanaManager(options, NAME) + options = manager.options - # create the beginning of the command and the working directory manager.setup() - from sequana import logger - - logger.setLevel(options.level) - logger.name = "sequana_rnaseq" - logger.info(f"#Welcome to sequana_multicov pipeline.") - # fill the config file with input parameters if options.from_project is None: cfg = manager.config.config cfg.input_directory = os.path.abspath(options.input_directory) cfg.input_pattern = options.input_pattern - cfg.sequana_coverage.circular = options.circular cfg.sequana_coverage.double_threshold = options.double_threshold - - if options.genbank: - genbank = os.path.abspath(options.genbank) - cfg.sequana_coverage.genbank_file = genbank - if os.path.exists(genbank): - shutil.copy(genbank, manager.workdir) - else: - raise IOError("{} not found".format(options.genbank)) - - if options.reference: - reference = os.path.abspath(options.reference) - cfg.sequana_coverage.reference_file = reference - if os.path.exists(reference): - shutil.copy(reference, manager.workdir) - else: - raise IOError("{} not found".format(options.reference)) - cfg.sequana_coverage.high_threshold = options.high_threshold cfg.sequana_coverage.low_threshold = options.low_threshold cfg.sequana_coverage.mixture_models = options.mixture_models @@ -153,16 +93,20 @@ def main(args=None): cfg.sequana_coverage.binning = options.binning cfg.sequana_coverage.cnv_clustering = options.cnv_clustering + if options.annotation_file: + annotation = os.path.abspath(options.annotation_file) + if not os.path.exists(annotation): + raise IOError(f"{options.annotation_file} not found") + cfg.sequana_coverage.annotation_file = annotation + if options.reference_file: + reference = os.path.abspath(options.reference_file) + if not os.path.exists(reference): + raise IOError(f"{options.reference_file} not found") + cfg.sequana_coverage.reference_file = reference - - # finalise the command and save it; copy the snakemake. update the config - # file and save it. manager.teardown() - if options.run: - subprocess.Popen(["sh", "{}.sh".format(NAME)], cwd=options.workdir) - if __name__ == "__main__": main() diff --git a/sequana_pipelines/multicov/multicov.rules b/sequana_pipelines/multicov/multicov.rules index e9601c6..4d876cd 100644 --- a/sequana_pipelines/multicov/multicov.rules +++ b/sequana_pipelines/multicov/multicov.rules @@ -14,11 +14,12 @@ import sys import json +import pandas as pd + +from sequana.utils.datatables_js import DataTable from sequana_pipetools import PipelineManager from sequana_pipetools import snaketools as sm -sequana_wrapper_branch="main" - configfile: "config.yaml" manager = PipelineManager("multicov", config, fastq=False) @@ -60,8 +61,8 @@ rule multiqc: "multiqc/multiqc.log" resources: **config["multiqc"]["resources"] - wrapper: - f"{sequana_wrapper_branch}/wrappers/multiqc" + shell: + manager.get_shell("multiqc/run", "v1") rule sequana_coverage: @@ -70,6 +71,8 @@ rule sequana_coverage: fasta=config['sequana_coverage']['reference_file'] output: "{sample}/sequana_coverage/sequana_coverage.html" + log: + "{sample}/sequana_coverage/sequana_coverage.log" params: circular=config["sequana_coverage"]["circular"], window_size=config["sequana_coverage"]["window_size"], @@ -79,12 +82,66 @@ rule sequana_coverage: high_threshold=config["sequana_coverage"]["high_threshold"], low_threshold=config["sequana_coverage"]["low_threshold"], mixture_models=config["sequana_coverage"]["mixture_models"], - gbk=config["sequana_coverage"]["annotation_file"] - wrapper: - f"{sequana_wrapper_branch}/wrappers/sequana_coverage" + annotation=config["sequana_coverage"]["annotation_file"], + output_directory=lambda wildcards: f"{wildcards.sample}/sequana_coverage", + options="" + container: + config['apptainers']['sequana_coverage'] + shell: + manager.get_shell("sequana_coverage/run", "v1") onsuccess: - pass + + from sequana.modules_report.summary import SequanaReport + from sequana import logger + logger.setLevel("INFO") + + manager.teardown() + + # General info table + from sequana_pipelines.multicov import __version__ + df_general = pd.DataFrame({ + "samples": len(manager.samples), + "sequana_multicov_version": __version__, + }, index=["summary"]) + datatable = DataTable(df_general.T, "general", index=True) + datatable.datatable.datatable_options = { + "paging": "false", + "bFilter": "false", + "bInfo": "false", + "bSort": "false", + } + js_general = datatable.create_javascript_function() + html_general = datatable.create_datatable(style="width:30%") + + # Per-sample links table + df_samples = pd.DataFrame({ + "sample": list(manager.samples.keys()), + "link": [f"{s}/sequana_coverage/sequana_coverage.html" for s in manager.samples], + }) + datatable = DataTable(df_samples, "samples", index=False) + datatable.datatable.datatable_options = { + "paging": "false", + "buttons": ["copy", "csv"], + "bSort": "true", + "dom": "BRSPfrti", + } + datatable.datatable.set_links_to_column("link", "sample") + js_samples = datatable.create_javascript_function() + html_samples = datatable.create_datatable() + + contents = f""" +

General information

+ {js_general}{html_general} +
+

Coverage reports per sample

+ {js_samples}{html_samples} + """ + + data = manager.getmetadata() + SequanaReport(data, intro=contents) + + shell("chmod -R g+w .") onerror: diff --git a/sequana_pipelines/multicov/requirements.txt b/sequana_pipelines/multicov/tools.txt similarity index 100% rename from sequana_pipelines/multicov/requirements.txt rename to sequana_pipelines/multicov/tools.txt diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index a50706b..0000000 --- a/setup.cfg +++ /dev/null @@ -1,14 +0,0 @@ -# a setup configuration -[build_sphinx] -source_dir = doc/source -build_dir = doc/build -all_files = 1 - - -[aliases] -test=pytest - -[tool:pytest] -addopts= --durations=10 --verbose -n 1 --cov . --cov-report term-missing - - diff --git a/setup.py b/setup.py deleted file mode 100644 index 43ce4f8..0000000 --- a/setup.py +++ /dev/null @@ -1,70 +0,0 @@ -from setuptools import setup, find_namespace_packages - -_MAJOR = 1 -_MINOR = 1 -_MICRO = 1 -version = '%d.%d.%d' % (_MAJOR, _MINOR, _MICRO) -release = '%d.%d' % (_MAJOR, _MINOR) - -metainfo = { - 'authors': {"main": ("thomas cokelaer", "thomas.cokelaer@pasteur.fr")}, - 'version': version, - 'license' : 'new BSD', - 'url' : "https://github.com/sequana/", - 'description': "Parallelise version of sequana_coverage standalone application." , - 'platforms' : ['Linux', 'Unix', 'MacOsX', 'Windows'], - 'keywords' : ['coverage, snakemake, sequana, running median, CNV, depltion'], - 'classifiers' : [ - 'Development Status :: 5 - Production/Stable', - 'Intended Audience :: Education', - 'Intended Audience :: End Users/Desktop', - 'Intended Audience :: Developers', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: BSD License', - 'Operating System :: OS Independent', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'Topic :: Software Development :: Libraries :: Python Modules', - 'Topic :: Scientific/Engineering :: Bio-Informatics', - 'Topic :: Scientific/Engineering :: Information Analysis', - 'Topic :: Scientific/Engineering :: Mathematics', - 'Topic :: Scientific/Engineering :: Physics'] - } - -NAME = "multicov" - -setup( - name = "sequana_{}".format(NAME), - version = version, - maintainer = metainfo['authors']['main'][0], - maintainer_email = metainfo['authors']['main'][1], - author = metainfo['authors']['main'][0], - author_email = metainfo['authors']['main'][1], - long_description = open("README.rst").read(), - keywords = metainfo['keywords'], - description = metainfo['description'], - license = metainfo['license'], - platforms = metainfo['platforms'], - url = metainfo['url'], - classifiers = metainfo['classifiers'], - - # package installation - packages = ["sequana_pipelines.multicov"], - - install_requires = open("requirements.txt").read(), - - # This is recursive include of data files - exclude_package_data = {"": ["__pycache__"]}, - package_data = { - '': ['*.yaml', "*.rules", "*.json", "requirements.txt", "*png", "*yml", "*smk"] - }, - - zip_safe=False, - - - entry_points = {'console_scripts':[ - 'sequana_multicov=sequana_pipelines.multicov.main:main'] - } - -) diff --git a/test/test_main.py b/test/test_main.py index 569f634..c4fd9c5 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -9,53 +9,55 @@ annotation = f"{test_dir}/data/JB409847.gbk" reference = f"{test_dir}/data/JB409847.fa" + def test_standalone_subprocess(tmpdir): directory = tmpdir.mkdir("wkdir") cmd = "sequana_multicov --input-directory {} " - cmd += "--working-directory {} --force --annotation {} " - cmd += " --reference {} -o " + cmd += "--working-directory {} --force --annotation-file {} " + cmd += " --reference-file {} --circular " cmd = cmd.format(sharedir, directory, annotation, reference) subprocess.call(cmd.split()) + def test_standalone_script(tmpdir): directory = tmpdir.mkdir("wkdir") import sequana_pipelines.multicov.main as m - sys.argv = ["test", "--input-directory", sharedir, - "--force"] - m.main() + sys.argv = ["test", "--input-directory", sharedir, "--force"] + m.main(standalone_mode=False) + def test_wrong_reference(tmpdir): import sequana_pipelines.multicov.main as m directory = tmpdir.mkdir("wkdir") - sys.argv = ["test", "--input-directory", str(directory), - "--force", "--reference", "wrong"] + sys.argv = ["test", "--input-directory", str(directory), + "--force", "--reference-file", "wrong"] try: - m.main() + m.main(standalone_mode=False) assert False except IOError: assert True -def test_wrong_genbank(tmpdir): + +def test_wrong_annotation(tmpdir): directory = tmpdir.mkdir("wkdir") import sequana_pipelines.multicov.main as m - sys.argv = ["test", "--input-directory", str(directory), - "--force", "--genbank", "wrong"] + sys.argv = ["test", "--input-directory", str(directory), + "--force", "--annotation-file", "wrong"] try: - m.main() + m.main(standalone_mode=False) assert False except IOError: assert True + def test_check_output(tmpdir): wkdir = tmpdir.mkdir("wkdir") - # create the wokring directory and script + # create the working directory and script cmd = f"sequana_multicov --input-directory {test_dir}/data " - cmd += f"--working-directory {wkdir} --force --annotation {annotation} " - cmd += f" --reference {reference} -o " + cmd += f"--working-directory {wkdir} --force --annotation-file {annotation} " + cmd += f" --reference-file {reference} --circular " subprocess.call(cmd.split()) subprocess.call("sh multicov.sh".split(), cwd=wkdir) - - From c252a9bf197c30b4ba86bc0de518de18bb4d6690 Mon Sep 17 00:00:00 2001 From: Thomas Cokelaer Date: Sat, 21 Mar 2026 21:14:22 +0100 Subject: [PATCH 2/2] remove stringent pin --- .github/workflows/apptainer.yml | 4 ++-- .github/workflows/main.yml | 2 +- pyproject.toml | 8 ++++---- sequana_pipelines/multicov/multicov.rules | 5 +---- sequana_pipelines/multicov/schema.yaml | 2 ++ 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/.github/workflows/apptainer.yml b/.github/workflows/apptainer.yml index d245ab5..0dd19e3 100644 --- a/.github/workflows/apptainer.yml +++ b/.github/workflows/apptainer.yml @@ -16,7 +16,7 @@ jobs: strategy: max-parallel: 5 matrix: - python: [3.8, 3.9, '3.10'] + python: ['3.11'] fail-fast: false @@ -52,6 +52,6 @@ jobs: - name: testing run: | - sequana_multicov --input-directory test/data/ --use-apptainer --reference test/data/JB409847.fa && cd multicov && sh multicov.sh + sequana_multicov --input-directory test/data/ --apptainer-prefix ~/images --reference-file test/data/JB409847.fa --input-pattern "*7.bed" && cd multicov && sh multicov.sh diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 198f05d..f80dba3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -16,7 +16,7 @@ jobs: strategy: max-parallel: 5 matrix: - python: [3.8, 3.9, '3.10'] + python: ['3.9', '3.10', '3.11'] fail-fast: false diff --git a/pyproject.toml b/pyproject.toml index c6bc8f9..b05bb1e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,9 +19,9 @@ classifiers = [ "Intended Audience :: Science/Research", "License :: OSI Approved :: BSD License", "Operating System :: POSIX :: Linux", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Scientific/Engineering :: Bio-Informatics", "Topic :: Scientific/Engineering :: Information Analysis", @@ -33,9 +33,9 @@ packages = [ [tool.poetry.dependencies] -python = ">=3.8,<4.0" -sequana = ">=0.20" -sequana_pipetools = ">=1.5.0" +python = ">=3.9,<4.0" +sequana = ">=0.17" +sequana_pipetools = ">=1.4.0" click-completion = ">=0.5.2" sequana-wrappers = ">=26.3.21" diff --git a/sequana_pipelines/multicov/multicov.rules b/sequana_pipelines/multicov/multicov.rules index 4d876cd..44afa84 100644 --- a/sequana_pipelines/multicov/multicov.rules +++ b/sequana_pipelines/multicov/multicov.rules @@ -143,9 +143,6 @@ onsuccess: shell("chmod -R g+w .") - onerror: - from sequana_pipetools.errors import PipeError - p = PipeError("multicov") - p.status() + manager.onerror() diff --git a/sequana_pipelines/multicov/schema.yaml b/sequana_pipelines/multicov/schema.yaml index 4423a2d..d247a2c 100644 --- a/sequana_pipelines/multicov/schema.yaml +++ b/sequana_pipelines/multicov/schema.yaml @@ -12,6 +12,8 @@ mapping: "input_pattern": type: str required: False + "apptainers": + type: any "sequana_coverage": type: map