From 781ba717e7ff5f7d08118b3a80dc080ef1800879 Mon Sep 17 00:00:00 2001 From: Ehsan ESTAJI <71376358+ehsanestaji@users.noreply.github.com> Date: Mon, 18 May 2026 20:33:37 +0200 Subject: [PATCH] chore: add adoption packaging starters --- .github/workflows/ci.yml | 8 +- .github/workflows/release.yml | 9 +- LICENSE | 21 ++++ README.md | 2 + docs/adoption-plan.md | 72 ++++++++++++ docs/benchmarking.md | 22 ++++ docs/packaging.md | 39 ++++++- docs/tool-landscape.md | 83 ++++++++++++++ integrations/multiqc/README.md | 29 +++++ integrations/multiqc/pyproject.toml | 20 ++++ .../src/fastaguard_multiqc/__init__.py | 13 +++ .../src/fastaguard_multiqc/multiqc_module.py | 103 ++++++++++++++++++ .../multiqc/src/fastaguard_multiqc/parser.py | 54 +++++++++ packaging/bioconda/README.md | 25 +++++ packaging/bioconda/build.sh | 11 ++ packaging/bioconda/meta.yaml | 45 ++++++++ tests/python/test_adoption_assets.py | 63 +++++++++++ 17 files changed, 614 insertions(+), 5 deletions(-) create mode 100644 LICENSE create mode 100644 docs/adoption-plan.md create mode 100644 docs/tool-landscape.md create mode 100644 integrations/multiqc/README.md create mode 100644 integrations/multiqc/pyproject.toml create mode 100644 integrations/multiqc/src/fastaguard_multiqc/__init__.py create mode 100644 integrations/multiqc/src/fastaguard_multiqc/multiqc_module.py create mode 100644 integrations/multiqc/src/fastaguard_multiqc/parser.py create mode 100644 packaging/bioconda/README.md create mode 100755 packaging/bioconda/build.sh create mode 100644 packaging/bioconda/meta.yaml create mode 100644 tests/python/test_adoption_assets.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 891cf3d..4e47b83 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,5 +1,8 @@ name: CI +env: + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true + on: push: branches: @@ -13,7 +16,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Install Rust uses: dtolnay/rust-toolchain@stable @@ -32,6 +35,9 @@ jobs: - name: Run clippy run: cargo clippy --locked --all-targets --all-features -- -D warnings + - name: Check adoption assets + run: python3 -m unittest tests.python.test_adoption_assets -v + - name: Build Docker image run: docker build -t fastaguard:ci . diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7a14e46..1bae0ec 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,5 +1,8 @@ name: Release +env: + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true + on: push: tags: @@ -25,7 +28,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Install Rust uses: dtolnay/rust-toolchain@stable @@ -45,7 +48,7 @@ jobs: run: scripts/package_release_artifact.sh "${{ matrix.target }}" "${GITHUB_REF_NAME}" - name: Upload artifact - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v5 with: name: fastaguard-${{ github.ref_name }}-${{ matrix.target }} path: dist/*.tar.gz @@ -57,7 +60,7 @@ jobs: needs: build steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Download artifacts uses: actions/download-artifact@v5 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c2a3f14 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Ehsan Estaji + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 577dd09..15234fd 100644 --- a/README.md +++ b/README.md @@ -135,6 +135,8 @@ FastaGuard catches FASTA-level assembly problems before expensive assembly QC. - [Product thesis](docs/product-thesis.md) - [MVP spec](docs/mvp-spec.md) - [Output contract](docs/output-contract.md) +- [Tool landscape](docs/tool-landscape.md) +- [Adoption plan](docs/adoption-plan.md) - [LLM and tooling vision](docs/llm-tooling-vision.md) - [Benchmarking](docs/benchmarking.md) - [Packaging](docs/packaging.md) diff --git a/docs/adoption-plan.md b/docs/adoption-plan.md new file mode 100644 index 0000000..3b6efc9 --- /dev/null +++ b/docs/adoption-plan.md @@ -0,0 +1,72 @@ +# Adoption Plan + +## Recommendation + +The next product phase should focus on installability and pipeline trust before +adding many new biological heuristics. + +Priority: + +```text +Bioconda -> BioContainers -> MultiQC plugin -> public benchmarks -> upstream workflow examples +``` + +## Phase 1: Package + +Goal: make installation natural for bioinformatics users. + +- Keep GitHub release binaries working. +- Keep Docker smoke tests passing. +- Replace the Bioconda recipe placeholder SHA256 once a public source archive exists. +- Submit `packaging/bioconda/` as `recipes/fastaguard/` to Bioconda. +- Let BioContainers build from the merged Bioconda recipe. + +Done when: + +```bash +conda install -c bioconda fastaguard +fastaguard --schema +``` + +works in a clean environment. + +## Phase 2: Aggregate + +Goal: make FastaGuard visible in standard pipeline reports. + +- Continue emitting `fastaguard_mqc.json` custom content. +- Develop `integrations/multiqc/` into a packaged MultiQC plugin. +- Test the plugin against multiple sample reports. +- Decide whether to submit upstream to MultiQC once public adoption begins. + +Done when: + +```bash +multiqc . +``` + +shows FastaGuard verdicts and key metrics across many samples. + +## Phase 3: Prove + +Goal: show why FastaGuard is worth adding before expensive tools. + +- Benchmark public FASTA files. +- Capture examples of duplicate IDs, invalid symbols, high-N scaffolds, and suspicious composition. +- Document which findings should block downstream tools and which should only recommend deeper QC. +- Create a concise comparison against `seqkit stats`, QUAST, BUSCO, BlobToolKit, FastQC, and MultiQC. + +Done when the README can show real examples rather than only promises. + +## Phase 4: Expand + +Goal: add profiles once the assembly preflight contract is trusted. + +- transcriptome profile +- protein profile +- reference-panel profile +- compare mode for many FASTA files +- richer anomaly evidence +- LLM/tool-agent affordances on top of stable JSON and finding catalogs + +Avoid expanding profiles before packaging and benchmarks are credible. diff --git a/docs/benchmarking.md b/docs/benchmarking.md index e43ec8a..eca68b8 100644 --- a/docs/benchmarking.md +++ b/docs/benchmarking.md @@ -80,3 +80,25 @@ Use it to answer: - does the tool still behave well on large record counts? Do not use it to claim performance on contaminated assemblies, highly ambiguous assemblies, or compressed FASTA until separate fixtures cover those cases. + +## Evidence To Collect Next + +Use release binaries and public assemblies to build a small evidence table for the README and release notes: + +- bacterial assembly around 5 Mbp +- fungal or small eukaryotic assembly around 30-50 Mbp +- large fragmented assembly with many contigs +- gzipped FASTA input +- intentionally problematic FASTA fixture with duplicate IDs and high-N scaffolds + +For each run, record: + +- FastaGuard version +- platform +- input size and sequence count +- elapsed seconds +- peak memory if measured externally +- verdict and top findings +- whether downstream tools would have been blocked or recommended + +This evidence matters more than synthetic speed alone because it shows the wedge: cheap FASTA preflight before expensive downstream QC. diff --git a/docs/packaging.md b/docs/packaging.md index 9518d8f..efec7ab 100644 --- a/docs/packaging.md +++ b/docs/packaging.md @@ -69,7 +69,13 @@ For the first public release: ## Bioconda -Bioconda should be added after the first public source archive is available. The recipe should expose one executable: +Bioconda should be submitted after the first public source archive is available. A starter recipe now lives in: + +```text +packaging/bioconda/ +``` + +The recipe should expose one executable: ```text fastaguard @@ -91,6 +97,14 @@ Do not block the MVP on Bioconda, but design for it now: - maintain stable exit codes - maintain a versioned JSON Schema +Important current blocker: the GitHub repository is private. Before upstream Bioconda submission, make the source archive public or move the final recipe to a public source URL and replace the placeholder SHA256 in `packaging/bioconda/meta.yaml`. + +Bioconda recipe guidance checked for this setup: + +- Bioconda hosts bioinformatics-specific packages. +- Rust dependencies should have license metadata bundled, so the starter recipe uses `cargo-bundle-licenses`. +- Tests in `meta.yaml` must rely only on runtime dependencies, so the starter tests use FastaGuard contract discovery commands. + ## Container Strategy The Docker image should stay boring: @@ -101,3 +115,26 @@ The Docker image should stay boring: - one entrypoint: `fastaguard` That makes it easy to run in Nextflow, Snakemake, Galaxy, and CI systems. + +Once the Bioconda recipe is merged upstream, BioContainers can build the corresponding container from the conda recipe. That path is preferable to maintaining a separate BioContainers Dockerfile unless Bioconda packaging proves impossible. + +## MultiQC + +FastaGuard v0.1.0 emits MultiQC custom content as `fastaguard_mqc.json`. + +A native MultiQC plugin starter now lives in: + +```text +integrations/multiqc/ +``` + +Local development: + +```bash +cd integrations/multiqc +python -m pip install -e . +cd ../../examples/reports +multiqc . +``` + +This is intentionally compact: it parses `fastaguard_mqc.json`, adds key metrics to MultiQC general stats, and adds a FastaGuard summary section. The full evidence remains in FastaGuard's own HTML and JSON reports. diff --git a/docs/tool-landscape.md b/docs/tool-landscape.md new file mode 100644 index 0000000..6a3788c --- /dev/null +++ b/docs/tool-landscape.md @@ -0,0 +1,83 @@ +# Tool Landscape + +## Positioning + +FastaGuard should not compete with established downstream tools. It should make +their inputs safer and easier to triage. + +Recommended slogan: + +```text +Run FastaGuard first. +``` + +Long-form positioning: + +```text +The FASTA preflight QC layer for modern bioinformatics pipelines. +``` + +## Where FastaGuard Fits + +| Tool | Primary role | When it runs | What FastaGuard adds before it | +| --- | --- | --- | --- | +| FastQC | Raw read QC | Before assembly or mapping | FastaGuard targets FASTA assemblies/references, not read files | +| seqkit | General sequence toolkit | Any ad hoc sequence operation | FastaGuard turns common FASTA checks into one opinionated QC contract | +| QUAST | Assembly quality evaluation | After assembly | FastaGuard catches structural FASTA problems before assembly QC | +| BUSCO | Completeness assessment | After assembly/transcriptome/protein prediction | FastaGuard checks parseability and composition before biological completeness | +| BlobToolKit | Contamination/cobiont exploration | After assembly and supporting evidence | FastaGuard flags FASTA-level anomalies before taxonomy workflows | +| MultiQC | Report aggregation | End of pipelines | FastaGuard emits data MultiQC can aggregate | +| Custom scripts | Pipeline-specific checks | Anywhere | FastaGuard replaces fragile repeated scripts with a versioned schema | + +## The Gap + +Without FastaGuard, users typically combine several partial checks: + +- run `seqkit stats` for counts and lengths +- run custom scripts for duplicate IDs or invalid symbols +- run QUAST for assembly metrics +- run BUSCO for biological completeness +- run BlobToolKit or taxonomy tooling for contamination exploration +- rely on pipeline-specific assumptions for exit codes and report parsing + +That works, but it is fragmented. The missing layer is a default, explainable, +machine-readable FASTA preflight contract. + +## Product Evidence We Have + +Current v0.1.0 evidence: + +- Rust CLI builds and runs as a single binary. +- Docker build and smoke test pass. +- GitHub release workflow builds Linux and macOS binaries. +- JSON Schema validates committed golden reports. +- Reports include bounded evidence records and suggested actions. +- MultiQC custom-content JSON is emitted as `fastaguard_mqc.json`. +- A native MultiQC plugin starter exists under `integrations/multiqc/`. +- Bioconda recipe scaffolding exists under `packaging/bioconda/`. +- nf-core, Nextflow, and Snakemake starters exist under `examples/`. + +Evidence still needed: + +- benchmarks on public assemblies +- user feedback from real pipeline authors +- Bioconda/BioContainers availability +- official MultiQC module or packaged plugin +- comparison examples showing what FastaGuard catches before QUAST/BUSCO/BlobToolKit + +## Message Discipline + +Say: + +```text +FastaGuard catches FASTA-level problems before expensive downstream QC. +``` + +Do not say: + +```text +FastQC for FASTA. +``` + +That phrase is tempting, but it hides the more important product idea: +FastaGuard is a pipeline-native preflight contract, not just a report. diff --git a/integrations/multiqc/README.md b/integrations/multiqc/README.md new file mode 100644 index 0000000..1d7a0d9 --- /dev/null +++ b/integrations/multiqc/README.md @@ -0,0 +1,29 @@ +# MultiQC FastaGuard Module Starter + +This directory contains a dedicated MultiQC plugin starter for FastaGuard. + +FastaGuard already emits MultiQC custom-content JSON as `fastaguard_mqc.json`. +This plugin is the next step: a native module that can add FastaGuard verdicts +and key assembly preflight metrics directly to MultiQC reports. + +## Local Install + +From this directory: + +```bash +python -m pip install -e . +cd path/to/fastaguard/results +multiqc . +``` + +The plugin looks for `*fastaguard_mqc.json` files and reads the same custom +content contract emitted by the CLI. + +## Current Scope + +- Parse FastaGuard custom-content JSON. +- Add verdict and summary metrics to the MultiQC general stats table. +- Add one FastaGuard summary table section. + +Keep the module compact. MultiQC should summarize many FastaGuard reports, not +replicate every field from the full FastaGuard HTML report. diff --git a/integrations/multiqc/pyproject.toml b/integrations/multiqc/pyproject.toml new file mode 100644 index 0000000..c5dafde --- /dev/null +++ b/integrations/multiqc/pyproject.toml @@ -0,0 +1,20 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "multiqc-fastaguard" +version = "0.1.0" +description = "MultiQC module for FastaGuard FASTA preflight reports" +readme = "README.md" +requires-python = ">=3.10" +license = "MIT" +dependencies = [ + "multiqc>=1.28", +] + +[project.entry-points."multiqc.modules.v1"] +fastaguard = "fastaguard_multiqc:MultiqcModule" + +[tool.hatch.build.targets.wheel] +packages = ["src/fastaguard_multiqc"] diff --git a/integrations/multiqc/src/fastaguard_multiqc/__init__.py b/integrations/multiqc/src/fastaguard_multiqc/__init__.py new file mode 100644 index 0000000..c95f7ec --- /dev/null +++ b/integrations/multiqc/src/fastaguard_multiqc/__init__.py @@ -0,0 +1,13 @@ +"""MultiQC plugin starter for FastaGuard.""" + +from .parser import load_custom_content_summary + +__all__ = ["MultiqcModule", "load_custom_content_summary"] + + +def __getattr__(name): + if name == "MultiqcModule": + from .multiqc_module import MultiqcModule + + return MultiqcModule + raise AttributeError(name) diff --git a/integrations/multiqc/src/fastaguard_multiqc/multiqc_module.py b/integrations/multiqc/src/fastaguard_multiqc/multiqc_module.py new file mode 100644 index 0000000..174fd83 --- /dev/null +++ b/integrations/multiqc/src/fastaguard_multiqc/multiqc_module.py @@ -0,0 +1,103 @@ +"""Native MultiQC module starter for FastaGuard.""" + +from __future__ import annotations + +from pathlib import Path + +from multiqc.base_module import BaseMultiqcModule, ModuleNoSamplesFound +from multiqc.plots import table + +from .parser import find_custom_content_files, load_custom_content_summary + + +class MultiqcModule(BaseMultiqcModule): + """Summarize FastaGuard FASTA preflight reports in MultiQC.""" + + def __init__(self): + super().__init__( + name="FastaGuard", + anchor="fastaguard", + href="https://github.com/ehsanestaji/FastaGuard", + info="FASTA preflight QC before downstream assembly analysis.", + ) + + data_by_sample = self._load_reports(Path.cwd()) + if not data_by_sample: + raise ModuleNoSamplesFound + + self.general_stats_addcols( + self._general_stats_data(data_by_sample), + self._general_stats_headers(), + ) + self.add_section( + name="FastaGuard summary", + anchor="fastaguard-summary", + description="FASTA preflight verdicts and core assembly metrics.", + plot=table.plot( + data_by_sample, + pconfig={ + "id": "fastaguard_summary", + "title": "FastaGuard FASTA preflight summary", + }, + ), + statuses=self._statuses(data_by_sample), + ) + self.write_data_file(data_by_sample, "multiqc_fastaguard") + + @staticmethod + def _load_reports(root: Path) -> dict[str, dict]: + data_by_sample: dict[str, dict] = {} + for path in find_custom_content_files(root): + try: + data_by_sample.update(load_custom_content_summary(path)) + except ValueError: + continue + return data_by_sample + + @staticmethod + def _statuses(data_by_sample: dict[str, dict]) -> dict[str, list[str]]: + statuses = {"pass": [], "warn": [], "fail": []} + for sample_name, row in data_by_sample.items(): + verdict = str(row.get("verdict", "")).lower() + if verdict in statuses: + statuses[verdict].append(sample_name) + return statuses + + @staticmethod + def _general_stats_data(data_by_sample: dict[str, dict]) -> dict[str, dict]: + visible_fields = ("finding_count", "n50", "n_percent") + return { + sample_name: { + field: row.get(field) + for field in visible_fields + if row.get(field) is not None + } + for sample_name, row in data_by_sample.items() + } + + @staticmethod + def _general_stats_headers() -> dict: + return { + "finding_count": { + "title": "FG findings", + "description": "Number of FastaGuard findings", + "min": 0, + "scale": "OrRd", + }, + "n50": { + "title": "FG N50", + "description": "FastaGuard assembly N50", + "hidden": True, + "min": 0, + "scale": "Blues", + }, + "n_percent": { + "title": "FG N%", + "description": "FastaGuard global N percentage", + "hidden": True, + "min": 0, + "max": 100, + "suffix": "%", + "scale": "OrRd", + }, + } diff --git a/integrations/multiqc/src/fastaguard_multiqc/parser.py b/integrations/multiqc/src/fastaguard_multiqc/parser.py new file mode 100644 index 0000000..53c1a53 --- /dev/null +++ b/integrations/multiqc/src/fastaguard_multiqc/parser.py @@ -0,0 +1,54 @@ +"""Parser helpers for FastaGuard MultiQC integration.""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + + +SUMMARY_FIELDS = ( + "verdict", + "sequence_count", + "total_length", + "n50", + "n90", + "gc_percent", + "n_percent", + "finding_count", +) + + +def load_custom_content_summary(path: str | Path) -> dict[str, dict[str, Any]]: + """Load one FastaGuard MultiQC custom-content JSON file.""" + report_path = Path(path) + payload = json.loads(report_path.read_text()) + + if payload.get("id") != "fastaguard": + raise ValueError(f"{report_path} is not a FastaGuard MultiQC custom-content file") + if payload.get("plot_type") != "table": + raise ValueError(f"{report_path} is not a FastaGuard table custom-content file") + + data = payload.get("data") + if not isinstance(data, dict) or not data: + raise ValueError(f"{report_path} has no FastaGuard sample data") + + parsed: dict[str, dict[str, Any]] = {} + for sample_name, row in data.items(): + if not isinstance(row, dict): + raise ValueError(f"{report_path} sample {sample_name!r} is not a table row") + parsed[str(sample_name)] = {field: row.get(field) for field in SUMMARY_FIELDS} + + return parsed + + +def find_custom_content_files(root: str | Path) -> list[Path]: + """Find likely FastaGuard MultiQC custom-content files below root.""" + search_root = Path(root) + candidates = { + path + for pattern in ("*fastaguard_mqc.json", "*fastaguard*.mqc.json") + for path in search_root.rglob(pattern) + if path.is_file() + } + return sorted(candidates) diff --git a/packaging/bioconda/README.md b/packaging/bioconda/README.md new file mode 100644 index 0000000..073db98 --- /dev/null +++ b/packaging/bioconda/README.md @@ -0,0 +1,25 @@ +# Bioconda Recipe Starter + +This directory is a staging recipe for upstream Bioconda submission. + +Bioconda should receive the recipe after the source archive is public and the +`sha256` value in `meta.yaml` has been replaced with the source archive hash. +Once the Bioconda PR is merged, BioContainers infrastructure can build the +corresponding container from the conda recipe. + +## Local Checks + +From a clone of `bioconda-recipes`, copy this directory to +`recipes/fastaguard/`, replace the placeholder SHA256, then run the standard +Bioconda recipe lint/build workflow. + +Minimum contract checks in the recipe: + +```bash +fastaguard --help +fastaguard --schema +fastaguard --finding-catalog +``` + +The recipe uses `cargo-bundle-licenses` so Rust dependency licenses can be +included in the package as `THIRDPARTY.yml`. diff --git a/packaging/bioconda/build.sh b/packaging/bioconda/build.sh new file mode 100755 index 0000000..38b08ee --- /dev/null +++ b/packaging/bioconda/build.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +set -euo pipefail + +cargo-bundle-licenses --format yaml --output THIRDPARTY.yml +cargo install -v --locked --no-track --root "${PREFIX}" --path . +rm -f "${PREFIX}/.crates.toml" "${PREFIX}/.crates2.json" + +install -Dm644 schema/fastaguard.schema.json \ + "${PREFIX}/share/${PKG_NAME}/schema/fastaguard.schema.json" +install -Dm644 schema/finding-catalog.json \ + "${PREFIX}/share/${PKG_NAME}/schema/finding-catalog.json" diff --git a/packaging/bioconda/meta.yaml b/packaging/bioconda/meta.yaml new file mode 100644 index 0000000..f505ff0 --- /dev/null +++ b/packaging/bioconda/meta.yaml @@ -0,0 +1,45 @@ +{% set name = "fastaguard" %} +{% set version = "0.1.0" %} + +package: + name: {{ name|lower }} + version: {{ version }} + +source: + url: https://github.com/ehsanestaji/FastaGuard/archive/refs/tags/v{{ version }}.tar.gz + sha256: REPLACE_WITH_PUBLIC_SOURCE_ARCHIVE_SHA256 + +build: + number: 0 + +requirements: + build: + - {{ compiler('rust') }} + - {{ compiler('c') }} + - cargo-bundle-licenses + host: + - zlib + run: + - zlib + +test: + commands: + - fastaguard --help + - fastaguard --schema + - fastaguard --finding-catalog + +about: + home: https://github.com/ehsanestaji/FastaGuard + summary: FASTA preflight QC for assembly pipelines + description: | + FastaGuard validates assembly FASTA files, detects structural and + composition red flags, and emits pipeline-ready JSON, TSV, HTML, and + MultiQC outputs before downstream analysis. + license: MIT + license_file: + - LICENSE + - THIRDPARTY.yml + +extra: + recipe-maintainers: + - ehsanestaji diff --git a/tests/python/test_adoption_assets.py b/tests/python/test_adoption_assets.py new file mode 100644 index 0000000..f8a7b88 --- /dev/null +++ b/tests/python/test_adoption_assets.py @@ -0,0 +1,63 @@ +import json +import sys +import unittest +from pathlib import Path +from tempfile import TemporaryDirectory + + +ROOT = Path(__file__).resolve().parents[2] +sys.path.insert(0, str(ROOT / "integrations" / "multiqc" / "src")) + +from fastaguard_multiqc.parser import load_custom_content_summary + + +class AdoptionAssetsTest(unittest.TestCase): + def test_multiqc_parser_reads_fastaguard_custom_content(self): + fixture = ROOT / "examples" / "reports" / "assembly_pass" / "fastaguard_mqc.json" + + summary = load_custom_content_summary(fixture) + + self.assertEqual(set(summary), {"valid_assembly"}) + self.assertEqual(summary["valid_assembly"]["verdict"], "PASS") + self.assertEqual(summary["valid_assembly"]["sequence_count"], 3) + self.assertEqual(summary["valid_assembly"]["n50"], 16) + + def test_multiqc_parser_rejects_non_fastaguard_custom_content(self): + with TemporaryDirectory() as temp_dir: + path = Path(temp_dir) / "other_mqc.json" + path.write_text( + json.dumps( + { + "id": "other_tool", + "plot_type": "table", + "data": {"sample": {"verdict": "PASS"}}, + } + ) + ) + + with self.assertRaisesRegex(ValueError, "not a FastaGuard"): + load_custom_content_summary(path) + + def test_multiqc_plugin_declares_module_entry_point(self): + pyproject = (ROOT / "integrations" / "multiqc" / "pyproject.toml").read_text() + + self.assertIn('[project.entry-points."multiqc.modules.v1"]', pyproject) + self.assertIn('fastaguard = "fastaguard_multiqc:MultiqcModule"', pyproject) + self.assertIn("multiqc", pyproject) + + def test_bioconda_recipe_declares_binary_and_contract_tests(self): + recipe = (ROOT / "packaging" / "bioconda" / "meta.yaml").read_text() + build = (ROOT / "packaging" / "bioconda" / "build.sh").read_text() + + self.assertIn('{% set name = "fastaguard" %}', recipe) + self.assertIn("{{ compiler('rust') }}", recipe) + self.assertIn("cargo-bundle-licenses", recipe) + self.assertIn("fastaguard --help", recipe) + self.assertIn("fastaguard --schema", recipe) + self.assertIn("fastaguard --finding-catalog", recipe) + self.assertIn("cargo install", build) + self.assertIn("--no-track", build) + + +if __name__ == "__main__": + unittest.main()