diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b332faf4..49b7b620 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,6 +2,7 @@ name: Tests and Lints on: push: + pull_request: env: RUSTFLAGS: "-Dwarnings" diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml new file mode 100644 index 00000000..50c689d0 --- /dev/null +++ b/.github/workflows/python.yml @@ -0,0 +1,153 @@ +name: Build (and publish) python wheels + +on: + push: + branches: + - main + - '*python*' + tags: + - '*' + pull_request: + workflow_dispatch: + +permissions: + contents: read + +jobs: + linux: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: ubuntu-22.04 + target: x86_64 + - runner: ubuntu-22.04 + target: aarch64 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: 3.x + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + working-directory: python_bindings + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter + sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} + manylinux: auto + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-linux-${{ matrix.platform.target }} + path: python_bindings/dist + + windows: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: windows-latest + target: x64 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: 3.x + architecture: ${{ matrix.platform.target }} + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + working-directory: python_bindings + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter + sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-windows-${{ matrix.platform.target }} + path: python_bindings/dist + + macos: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: macos-13 + target: x86_64 + - runner: macos-14 + target: aarch64 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: 3.x + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + working-directory: python_bindings + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter + sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-macos-${{ matrix.platform.target }} + path: python_bindings/dist + + sources: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Build sdist + uses: PyO3/maturin-action@v1 + with: + working-directory: python_bindings + command: sdist + args: --out dist + - name: Upload sdist + uses: actions/upload-artifact@v4 + with: + name: wheels-sdist + path: python_bindings/dist + + release: + name: Release + if: ${{ startsWith(github.ref, 'refs/tags/python_bindings-v') }} + runs-on: ubuntu-latest + needs: [linux, windows, macos, sources] + permissions: + id-token: write + contents: write + attestations: write + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Check Versions + id: versions + shell: bash + run: | + tag="${GITHUB_REF##*/}" + tag_version="${tag#*-v}" + cargo_version=$(grep '^version' python_bindings/Cargo.toml | head -1 | sed 's/version = "\(.*\)"/\1/') + if [ "$cargo_version" != "$tag_version" ]; then + echo "Versions mismatch: tag specifies $tag_version, but crate has version $cargo_version" + exit 1 + fi + - name: Get artifacts + uses: actions/download-artifact@v4 + - name: Generate artifact attestation + uses: actions/attest-build-provenance@v2 + with: + subject-path: 'wheels-*/*' + - name: Publish to PyPI + if: ${{ startsWith(github.ref, 'refs/tags/') }} + env: + TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} + run: | + pip install twine + twine upload \ + --non-interactive \ + --username __token__ \ + --repository pypi \ + wheels-*/* diff --git a/Cargo.lock b/Cargo.lock index 56892b2c..ddb533ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -60,9 +60,15 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.97" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" + +[[package]] +name = "arc-swap" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" [[package]] name = "arrayref" @@ -123,9 +129,9 @@ dependencies = [ [[package]] name = "bytemuck" -version = "1.22.0" +version = "1.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6b1fc10dbac614ebc03540c9dbd60e83887fda27794998c6528f1782047d540" +checksum = "9134a6ef01ce4b366b50689c94f82c14bc72bc5d0386829828a2e2752ef7958c" [[package]] name = "byteorder-lite" @@ -141,9 +147,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "4.5.34" +version = "4.5.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e958897981290da2a852763fe9cdb89cd36977a5d729023127095fa94d95e2ff" +checksum = "eccb054f56cbd38340b380d4a8e69ef1f02f1af43db2f0cc817a4774d80ae071" dependencies = [ "clap_builder", "clap_derive", @@ -151,9 +157,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.34" +version = "4.5.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83b0f35019843db2160b5bb19ae09b4e6411ac33fc6a712003c33e03090e2489" +checksum = "efd9466fac8543255d3b1fcad4762c5e116ffe808c8a3043d4263cd4fd4862a2" dependencies = [ "anstream", "anstyle", @@ -193,9 +199,9 @@ checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" [[package]] name = "compact-genome" -version = "12.2.0" +version = "12.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f512b52ff4bfc23a1544c203b11c1fa6ae30edc9bde2e58806ff63ad8a4987d" +checksum = "503cda65a29118e687d71d64d7a3c1e4d21f235a3aa55b22cd2902a99d04d9ff" dependencies = [ "bitvec", "enum-iterator", @@ -239,9 +245,9 @@ checksum = "5c297a1c74b71ae29df00c3e22dd9534821d60eb9af5a0192823fa2acea70c2a" [[package]] name = "deranged" -version = "0.4.1" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28cfac68e08048ae1883171632c2aef3ebc555621ae56fbccce1cbf22dd7f058" +checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" dependencies = [ "powerfmt", ] @@ -310,9 +316,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11faaf5a5236997af9848be0bef4db95824b1d534ebc64d0f0c6cf3e67bd38dc" +checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece" dependencies = [ "crc32fast", "miniz_oxide", @@ -376,9 +382,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.2" +version = "0.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" [[package]] name = "heck" @@ -404,14 +410,20 @@ checksum = "edcd27d72f2f071c64249075f42e205ff93c9a4c5f6c6da53e79ed9f9832c285" [[package]] name = "indexmap" -version = "2.8.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3954d50fe15b02142bf25d3b8bdadb634ec3948f103d04ffe3031bc8fe9d7058" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", "hashbrown", ] +[[package]] +name = "indoc" +version = "2.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -435,9 +447,9 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "kurbo" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89234b2cc610a7dd927ebde6b41dd1a5d4214cffaef4cf1fb2195d592f92518f" +checksum = "1077d333efea6170d9ccb96d3c3026f300ca0773da4938cc4c811daa6df68b0c" dependencies = [ "arrayvec", "smallvec", @@ -486,15 +498,15 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.171" +version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" [[package]] name = "libm" -version = "0.2.11" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" +checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" [[package]] name = "log" @@ -527,11 +539,20 @@ dependencies = [ "libc", ] +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + [[package]] name = "miniz_oxide" -version = "0.8.5" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5" +checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" dependencies = [ "adler2", "simd-adler32", @@ -616,9 +637,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.21.1" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d75b0bedcc4fe52caa0e03d9f1151a323e4aa5e2d78ba3580400cd3c9e2bc4bc" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "pico-args" @@ -662,13 +683,109 @@ checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" [[package]] name = "proc-macro2" -version = "1.0.94" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] +[[package]] +name = "py_lib_tsalign" +version = "0.1.0" +dependencies = [ + "lib_tsalign", + "lib_tsshow", + "pyo3", + "pyo3-log", + "pythonize", + "serde", +] + +[[package]] +name = "pyo3" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5203598f366b11a02b13aa20cab591229ff0a89fd121a308a5df751d5fc9219" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99636d423fa2ca130fa5acde3059308006d46f98caac629418e53f7ebb1e9999" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78f9cf92ba9c409279bc3305b5409d90db2d2c22392d443a87df3a1adad59e33" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-log" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7079e412e909af5d6be7c04a7f29f6a2837a080410e1c529c9dee2c367383db4" +dependencies = [ + "arc-swap", + "log", + "pyo3", +] + +[[package]] +name = "pyo3-macros" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b999cb1a6ce21f9a6b147dcf1be9ffedf02e0043aec74dc390f3007047cecd9" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "822ece1c7e1012745607d5cf0bcb2874769f0f7cb34c4cde03b9358eb9ef911a" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + +[[package]] +name = "pythonize" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5bcac0d0b71821f0d69e42654f1e15e5c94b85196446c4de9588951a2117e7b" +dependencies = [ + "pyo3", + "serde", +] + [[package]] name = "quick-error" version = "2.0.1" @@ -718,9 +835,9 @@ dependencies = [ [[package]] name = "resvg" -version = "0.45.0" +version = "0.45.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd43d1c474e9dadf09a8fdf22d713ba668b499b5117b9b9079500224e26b5b29" +checksum = "a8928798c0a55e03c9ca6c4c6846f76377427d2c1e1f7e6de3c06ae57942df43" dependencies = [ "gif", "image-webp", @@ -848,9 +965,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.14.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" +checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" [[package]] name = "strict-num" @@ -905,9 +1022,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.100" +version = "2.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" +checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" dependencies = [ "proc-macro2", "quote", @@ -926,6 +1043,12 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "target-lexicon" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a" + [[package]] name = "termcolor" version = "1.4.1" @@ -1031,9 +1154,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "toml" -version = "0.8.20" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd87a5cdd6ffab733b2f74bc4fd7ee5fff6634124999ac278c35fc78c6120148" +checksum = "05ae329d1f08c4d17a59bed7ff5b5a769d062e64a62d34a3261b219e62cd5aae" dependencies = [ "serde", "serde_spanned", @@ -1043,26 +1166,33 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.8" +version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" +checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.22.24" +version = "0.22.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474" +checksum = "310068873db2c5b3e7659d2cc35d21855dbafa50d1ce336397c666e3cb08137e" dependencies = [ "indexmap", "serde", "serde_spanned", "toml_datetime", + "toml_write", "winnow", ] +[[package]] +name = "toml_write" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfb942dfe1d8e29a7ee7fcbde5bd2b9a25fb89aa70caea2eba3bee836ff41076" + [[package]] name = "traitsequence" version = "8.1.2" @@ -1145,11 +1275,17 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1d386ff53b415b7fe27b50bb44679e2cc4660272694b7b6f3326d8480823a94" +[[package]] +name = "unindent" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" + [[package]] name = "usvg" -version = "0.45.0" +version = "0.45.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ac8e0e3e4696253dc06167990b3fe9a2668ab66270adf949a464db4088cb354" +checksum = "80be9b06fbae3b8b303400ab20778c80bbaf338f563afe567cf3c9eea17b47ef" dependencies = [ "base64", "data-url", @@ -1274,9 +1410,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.7.4" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e97b544156e9bebe1a0ffbc03484fc1ffe3100cbce3ffb17eac35f7cdd7ab36" +checksum = "c06928c8748d81b05c9be96aad92e1b6ff01833332f281e8cfca3be4b35fc9ec" dependencies = [ "memchr", ] diff --git a/Cargo.toml b/Cargo.toml index dd4679f5..5cb11843 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,9 @@ members = [ "tsalign", "lib_tsshow", + # Bindings + "python_bindings", + # Internal "tsalign-tests", ] @@ -19,7 +22,7 @@ package.repository = "https://github.com/sebschmi/template-switch-aligner" [workspace.dependencies] serde = "1.0.219" -compact-genome = "12.2.0" +compact-genome = "12.3.0" traitsequence = "8.1.2" log = "0.4.27" num-traits = "0.2.19" diff --git a/lib_tsalign/src/a_star_aligner.rs b/lib_tsalign/src/a_star_aligner.rs index 203a44dd..70442efc 100644 --- a/lib_tsalign/src/a_star_aligner.rs +++ b/lib_tsalign/src/a_star_aligner.rs @@ -19,6 +19,7 @@ use crate::config; pub mod alignment_geometry; pub mod alignment_result; +pub mod configurable_a_star_align; pub mod gap_affine_edit_distance; pub mod template_switch_distance; #[cfg(test)] diff --git a/lib_tsalign/src/a_star_aligner/alignment_geometry.rs b/lib_tsalign/src/a_star_aligner/alignment_geometry.rs index eae23084..8d315b01 100644 --- a/lib_tsalign/src/a_star_aligner/alignment_geometry.rs +++ b/lib_tsalign/src/a_star_aligner/alignment_geometry.rs @@ -1,12 +1,16 @@ use std::{fmt::Display, ops::Range}; -#[derive(Debug, Clone, Eq, PartialEq)] +use serde::Deserialize; + +#[derive(Debug, Clone, Eq, PartialEq, Deserialize)] +#[serde(rename_all = "snake_case")] pub struct AlignmentRange { offset: AlignmentCoordinates, limit: AlignmentCoordinates, } -#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Debug, Clone, Copy, Eq, PartialEq, Deserialize)] +#[serde(rename_all = "snake_case")] pub struct AlignmentCoordinates { reference: usize, query: usize, diff --git a/lib_tsalign/src/a_star_aligner/configurable_a_star_align.rs b/lib_tsalign/src/a_star_aligner/configurable_a_star_align.rs new file mode 100644 index 00000000..ec199db6 --- /dev/null +++ b/lib_tsalign/src/a_star_aligner/configurable_a_star_align.rs @@ -0,0 +1,337 @@ +use std::fmt::Debug; + +use compact_genome::{ + implementation::{ + alphabets::{ + dna_alphabet::DnaAlphabet, dna_alphabet_or_n::DnaAlphabetOrN, + dna_iupac_nucleic_acid_alphabet::DnaIupacNucleicAcidAlphabet, + rna_alphabet::RnaAlphabet, rna_alphabet_or_n::RnaAlphabetOrN, + rna_iupac_nucleic_acid_alphabet::RnaIupacNucleicAcidAlphabet, + }, + vec_sequence::VectorGenome, + }, + interface::{ + alphabet::Alphabet, + sequence::{GenomeSequence, OwnedGenomeSequence}, + }, +}; +use generic_a_star::cost::U64Cost; +use serde::Deserialize; + +use crate::{ + a_star_aligner::{ + template_switch_distance::strategies::{ + AlignmentStrategySelection, primary_range::NoPrunePrimaryRangeStrategy, + secondary_deletion::AllowSecondaryDeletionStrategy, shortcut::NoShortcutStrategy, + }, + template_switch_distance_a_star_align, + }, + config::TemplateSwitchConfig, +}; + +use super::{ + alignment_geometry::AlignmentRange, + alignment_result::AlignmentResult, + template_switch_distance::{ + AlignmentType, + strategies::{ + chaining::{ + ChainingStrategy, LowerBoundChainingStrategy, NoChainingStrategy, + PrecomputeOnlyChainingStrategy, + }, + node_ord::{AntiDiagonalNodeOrdStrategy, CostOnlyNodeOrdStrategy, NodeOrdStrategy}, + primary_match::AllowPrimaryMatchStrategy, + template_switch_count::{ + MaxTemplateSwitchCountStrategy, NoTemplateSwitchCountStrategy, + TemplateSwitchCountStrategy, + }, + template_switch_min_length::{ + LookaheadTemplateSwitchMinLengthStrategy, NoTemplateSwitchMinLengthStrategy, + TemplateSwitchMinLengthStrategy, + }, + }, + }, +}; + +// TODO to be discussed +// TODO more ergonomic way to only adjust some cost values ... +/// Default costs for a star alignment, given in the custom `.tsa` format +const DEFAULT_COSTS: &str = include_str!("../../../sample_tsa_config/config.tsa"); + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "snake_case", default)] +pub struct Config { + pub alphabet: InputAlphabet, + pub reference_name: String, + pub query_name: String, + /// Costs specification in plain text format. + /// + /// This is the same format that the `.tsa` config files use. + pub costs: String, + + pub node_ord_strategy: NodeOrdStrategySelector, + pub min_length_strategy: MinLengthStrategySelector, + pub chaining_strategy: ChainingStrategySelector, + pub no_ts: bool, + + pub cost_limit: Option, + /// Approximate memory limit in bytes. + pub memory_limit: Option, + pub range: Option, +} + +impl Default for Config { + fn default() -> Self { + Self { + alphabet: InputAlphabet::DnaN, + reference_name: "reference".to_owned(), + query_name: "query".to_owned(), + costs: DEFAULT_COSTS.to_owned(), + node_ord_strategy: NodeOrdStrategySelector::AntiDiagonal, + min_length_strategy: MinLengthStrategySelector::Lookahead, + chaining_strategy: ChainingStrategySelector::None, + no_ts: false, + cost_limit: None, + memory_limit: None, + range: None, + } + } +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum InputAlphabet { + Dna, + DnaN, + Rna, + RnaN, + DnaIupac, + RnaIupac, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum NodeOrdStrategySelector { + CostOnly, + AntiDiagonal, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum MinLengthStrategySelector { + None, + Lookahead, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ChainingStrategySelector { + None, + PrecomputeOnly, + LowerBound, +} + +/// Align `query` to `reference` with the given `config`. +/// +/// `query` and `reference` must be ASCII strings restricted to the characters specified by `config.alphabet`. +pub fn a_star_align( + reference: &[u8], + query: &[u8], + config: &Config, +) -> AlignmentResult { + match config.alphabet { + InputAlphabet::Dna => { + a_star_align_select_node_ord_strategy::(reference, query, config) + } + InputAlphabet::DnaN => { + a_star_align_select_node_ord_strategy::(reference, query, config) + } + InputAlphabet::Rna => { + a_star_align_select_node_ord_strategy::(reference, query, config) + } + InputAlphabet::RnaN => { + a_star_align_select_node_ord_strategy::(reference, query, config) + } + InputAlphabet::DnaIupac => a_star_align_select_node_ord_strategy::< + DnaIupacNucleicAcidAlphabet, + >(reference, query, config), + InputAlphabet::RnaIupac => a_star_align_select_node_ord_strategy::< + RnaIupacNucleicAcidAlphabet, + >(reference, query, config), + } +} + +fn a_star_align_select_node_ord_strategy( + reference: &[u8], + query: &[u8], + config: &Config, +) -> AlignmentResult { + let reference = VectorGenome::::from_slice_u8(reference).unwrap(); + let query = VectorGenome::from_slice_u8(query).unwrap(); + let costs = TemplateSwitchConfig::read_plain(config.costs.as_bytes()).unwrap(); + + match config.node_ord_strategy { + NodeOrdStrategySelector::CostOnly => { + a_star_align_select_template_switch_min_length_strategy::<_, _, CostOnlyNodeOrdStrategy>( + reference.as_genome_subsequence(), + query.as_genome_subsequence(), + config, + costs, + ) + } + NodeOrdStrategySelector::AntiDiagonal => { + a_star_align_select_template_switch_min_length_strategy::< + _, + _, + AntiDiagonalNodeOrdStrategy, + >( + reference.as_genome_subsequence(), + query.as_genome_subsequence(), + config, + costs, + ) + } + } +} + +fn a_star_align_select_template_switch_min_length_strategy< + AlphabetType: Alphabet + Debug + Clone + Eq, + SubsequenceType: GenomeSequence + ?Sized, + NodeOrd: NodeOrdStrategy, +>( + reference: &SubsequenceType, + query: &SubsequenceType, + config: &Config, + costs: TemplateSwitchConfig, +) -> AlignmentResult { + match config.min_length_strategy { + MinLengthStrategySelector::None => align_a_star_template_switch_select_chaining_strategy::< + _, + _, + NodeOrd, + NoTemplateSwitchMinLengthStrategy, + >(reference, query, config, costs), + MinLengthStrategySelector::Lookahead => { + align_a_star_template_switch_select_chaining_strategy::< + _, + _, + NodeOrd, + LookaheadTemplateSwitchMinLengthStrategy, + >(reference, query, config, costs) + } + } +} + +fn align_a_star_template_switch_select_chaining_strategy< + AlphabetType: Alphabet + Debug + Clone + Eq, + SubsequenceType: GenomeSequence + ?Sized, + NodeOrd: NodeOrdStrategy, + TemplateSwitchMinLength: TemplateSwitchMinLengthStrategy, +>( + reference: &SubsequenceType, + query: &SubsequenceType, + config: &Config, + costs: TemplateSwitchConfig, +) -> AlignmentResult { + match config.chaining_strategy { + ChainingStrategySelector::None => align_a_star_template_switch_select_no_ts_strategy::< + _, + _, + NodeOrd, + TemplateSwitchMinLength, + NoChainingStrategy, + >(reference, query, config, costs), + ChainingStrategySelector::PrecomputeOnly => { + align_a_star_template_switch_select_no_ts_strategy::< + _, + _, + NodeOrd, + TemplateSwitchMinLength, + PrecomputeOnlyChainingStrategy, + >(reference, query, config, costs) + } + ChainingStrategySelector::LowerBound => { + align_a_star_template_switch_select_no_ts_strategy::< + _, + _, + NodeOrd, + TemplateSwitchMinLength, + LowerBoundChainingStrategy, + >(reference, query, config, costs) + } + } +} + +fn align_a_star_template_switch_select_no_ts_strategy< + AlphabetType: Alphabet + Debug + Clone + Eq, + SubsequenceType: GenomeSequence + ?Sized, + NodeOrd: NodeOrdStrategy, + TemplateSwitchMinLength: TemplateSwitchMinLengthStrategy, + Chaining: ChainingStrategy, +>( + reference: &SubsequenceType, + query: &SubsequenceType, + config: &Config, + costs: TemplateSwitchConfig, +) -> AlignmentResult { + if config.no_ts { + align_a_star_template_switch_distance_call::< + _, + _, + NodeOrd, + TemplateSwitchMinLength, + Chaining, + MaxTemplateSwitchCountStrategy, + >(reference, query, config, costs, 0) + } else { + align_a_star_template_switch_distance_call::< + _, + _, + NodeOrd, + TemplateSwitchMinLength, + Chaining, + NoTemplateSwitchCountStrategy, + >(reference, query, config, costs, ()) + } +} + +fn align_a_star_template_switch_distance_call< + AlphabetType: Alphabet + Debug + Clone + Eq, + SubsequenceType: GenomeSequence + ?Sized, + NodeOrd: NodeOrdStrategy, + TemplateSwitchMinLength: TemplateSwitchMinLengthStrategy, + Chaining: ChainingStrategy, + TemplateSwitchCount: TemplateSwitchCountStrategy, +>( + reference: &SubsequenceType, + query: &SubsequenceType, + config: &Config, + costs: TemplateSwitchConfig, + template_switch_count_memory: ::Memory, +) -> AlignmentResult { + template_switch_distance_a_star_align::< + AlignmentStrategySelection< + AlphabetType, + U64Cost, + NodeOrd, + TemplateSwitchMinLength, + Chaining, + TemplateSwitchCount, + AllowSecondaryDeletionStrategy, + NoShortcutStrategy, + AllowPrimaryMatchStrategy, + NoPrunePrimaryRangeStrategy, + >, + _, + >( + reference, + query, + &config.reference_name, + &config.query_name, + config.range.clone(), + costs, + config.cost_limit, + config.memory_limit, + template_switch_count_memory, + ) +} diff --git a/python_bindings/.gitignore b/python_bindings/.gitignore new file mode 100644 index 00000000..c8f04429 --- /dev/null +++ b/python_bindings/.gitignore @@ -0,0 +1,72 @@ +/target + +# Byte-compiled / optimized / DLL files +__pycache__/ +.pytest_cache/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +.venv/ +env/ +bin/ +build/ +develop-eggs/ +dist/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +include/ +man/ +venv/ +*.egg-info/ +.installed.cfg +*.egg + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt +pip-selfcheck.json + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Rope +.ropeproject + +# Django stuff: +*.log +*.pot + +.DS_Store + +# Sphinx documentation +docs/_build/ + +# PyCharm +.idea/ + +# VSCode +.vscode/ + +# Pyenv +.python-version diff --git a/python_bindings/Cargo.toml b/python_bindings/Cargo.toml new file mode 100644 index 00000000..d62fb41c --- /dev/null +++ b/python_bindings/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "py_lib_tsalign" +description = "Python Bindings for `lib_tsalign`" +license.workspace = true +authors = ["Sebastian Schmidt ", "Jasper Krauter "] +version = "0.1.0" +edition.workspace = true +rust-version.workspace = true +repository.workspace = true + +[lib] +name = "tsalign" +doc = false +crate-type = ["cdylib"] + +[dependencies] +pyo3 = "0.24.0" +lib_tsalign = { version = "0.15.0", path = "../lib_tsalign", features = ["serde"] } +lib_tsshow = { version = "0.15.0", path = "../lib_tsshow" } +serde.workspace = true +pythonize = "0.24.0" +pyo3-log = "0.12.3" diff --git a/python_bindings/README.md b/python_bindings/README.md new file mode 100644 index 00000000..96563830 --- /dev/null +++ b/python_bindings/README.md @@ -0,0 +1,10 @@ +# Python Bindings for [`lib_tsalign`](https://crates.io/crates/lib_tsalign) + +[![PyPI](https://img.shields.io/pypi/v/tsalign)](https://pypi.org/project/tsalign/) + +These bindings are still very minimal and are subject to improvement and/or breaking changes with future versions. + +## Usage +Install with `pip install tsalign`. + +The most important function is `tsalign.align(reference, query, **settings)`. On the object that is returned, you can e.g. call `.stats()` or `.cigar()`. diff --git a/python_bindings/pyproject.toml b/python_bindings/pyproject.toml new file mode 100644 index 00000000..4d18e360 --- /dev/null +++ b/python_bindings/pyproject.toml @@ -0,0 +1,15 @@ +[build-system] +requires = ["maturin>=1.8,<2.0"] +build-backend = "maturin" + +[project] +name = "tsalign" +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dynamic = ["version"] +[tool.maturin] +features = ["pyo3/extension-module"] diff --git a/python_bindings/src/lib.rs b/python_bindings/src/lib.rs new file mode 100644 index 00000000..fcbfe0b5 --- /dev/null +++ b/python_bindings/src/lib.rs @@ -0,0 +1,100 @@ +use std::io; + +use lib_tsalign::{ + a_star_aligner::{ + alignment_result::AlignmentResult, + configurable_a_star_align::{Config, a_star_align}, + template_switch_distance::AlignmentType, + }, + costs::U64Cost, +}; +use lib_tsshow::plain_text::show_template_switches; +use pyo3::{exceptions::PyRuntimeError, prelude::*, types::PyDict}; +use pythonize::{depythonize, pythonize}; + +#[pyclass] +struct TSPairwiseAlignment { + result: AlignmentResult, +} + +#[pymethods] +impl TSPairwiseAlignment { + fn viz_template_switches(&self) -> PyResult<()> { + show_template_switches(io::stdout(), &self.result, &None) + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + Ok(()) + } + + fn stats<'a>(&'a self, py: Python<'a>) -> PyResult> { + Ok(pythonize(py, self.result.statistics())?) + } + + fn cigar(&self) -> Option { + match &self.result { + AlignmentResult::WithTarget { alignment, .. } => Some(alignment.cigar()), + AlignmentResult::WithoutTarget { .. } => None, + } + } + + fn alignments<'a>(&'a self, py: Python<'a>) -> PyResult>> { + match &self.result { + AlignmentResult::WithTarget { alignment, .. } => { + let mut container = Vec::new(); + alignment.iter_compact().for_each(|e| container.push(e)); + Ok(Some(pythonize(py, &container)?)) + } + AlignmentResult::WithoutTarget { .. } => Ok(None), + } + } +} + +fn py_to_str(o: Bound<'_, PyAny>) -> PyResult> { + let str = o.str()?.to_str()?.as_bytes().to_vec(); + Ok(str) +} + +/// Creates a config object by amending the default by values present in the python dictionary +fn create_config(kwargs: Option<&Bound<'_, PyDict>>) -> PyResult { + let Some(kwargs) = kwargs else { + return Ok(Config::default()); + }; + + let config = depythonize::(kwargs)?; + + Ok(config) +} + +/// Align two sequences, accounting for template switches +/// +/// The function takes a reference and a query string, and performs a global alignment on both. The output alignment may contain (short-range) template switches. +/// Optionally, settings can be specified. See the (configuration struct)[lib_tsalign::a_star_aligner::configurable_a_star_align::Config] for the available keys and values. +#[pyfunction] +#[pyo3(signature = (reference, query, **kwargs))] +fn align( + reference: Bound<'_, PyAny>, // Accepting PyAny instead of PyString to allow using e.g. `Bio.Seq` types and alike. String representation will be used. + query: Bound<'_, PyAny>, + kwargs: Option<&Bound<'_, PyDict>>, +) -> PyResult> { + let reference = py_to_str(reference)?; + let query = py_to_str(query)?; + let config = create_config(kwargs)?; + + let r = a_star_align(&reference, &query, &config); + + match r { + result @ AlignmentResult::WithTarget { .. } => { + let ts_alignment = TSPairwiseAlignment { result }; + Ok(Some(ts_alignment)) + } + AlignmentResult::WithoutTarget { .. } => Ok(None), + } +} + +/// Bindings for the `lib_tsalign` library. +#[pymodule] +fn tsalign(m: &Bound<'_, PyModule>) -> PyResult<()> { + pyo3_log::init(); + m.add_class::()?; + m.add_function(wrap_pyfunction!(align, m)?)?; + Ok(()) +}