diff --git a/Cargo.lock b/Cargo.lock
index 5e86c23..d3cb511 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -46,6 +46,16 @@ dependencies = [
"memchr",
]
+[[package]]
+name = "aligned-vec"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc890384c8602f339876ded803c97ad529f3842aba97f6392b3dba0dd171769b"
+dependencies = [
+ "equator",
+ "serde",
+]
+
[[package]]
name = "alloc-no-stdlib"
version = "2.0.4"
@@ -848,6 +858,15 @@ dependencies = [
"serde",
]
+[[package]]
+name = "bincode"
+version = "1.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
+dependencies = [
+ "serde",
+]
+
[[package]]
name = "bindgen"
version = "0.69.5"
@@ -2250,6 +2269,26 @@ dependencies = [
"log",
]
+[[package]]
+name = "equator"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4711b213838dfee0117e3be6ac926007d7f433d7bbe33595975d4190cb07e6fc"
+dependencies = [
+ "equator-macro",
+]
+
+[[package]]
+name = "equator-macro"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
[[package]]
name = "equivalent"
version = "1.0.1"
@@ -4422,6 +4461,7 @@ dependencies = [
"rust-lapper",
"strum",
"strum_macros",
+ "superintervals",
"tokio",
]
@@ -4666,6 +4706,19 @@ version = "2.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
+[[package]]
+name = "superintervals"
+version = "0.3.6"
+dependencies = [
+ "aligned-vec",
+ "bincode",
+ "clap",
+ "fnv",
+ "libc",
+ "rand 0.8.5",
+ "serde",
+]
+
[[package]]
name = "syn"
version = "2.0.106"
diff --git a/flamegraph.svg b/flamegraph.svg
new file mode 100644
index 0000000..b295dd9
--- /dev/null
+++ b/flamegraph.svg
@@ -0,0 +1,491 @@
+
\ No newline at end of file
diff --git a/output.txt b/output.txt
new file mode 100644
index 0000000..7796154
--- /dev/null
+++ b/output.txt
@@ -0,0 +1 @@
+test Coitrees-Small-Medium-2-7-7-8 ... bench: 8488532322 ns/iter (+/- 417491090)
diff --git a/sequila/sequila-core/Cargo.toml b/sequila/sequila-core/Cargo.toml
index e1ceacc..e7e0a17 100644
--- a/sequila/sequila-core/Cargo.toml
+++ b/sequila/sequila-core/Cargo.toml
@@ -24,6 +24,7 @@ fnv = "1.0.7"
bio = "2.0.1"
rand = "0.8.5"
rust-lapper = "1.1.0"
+superintervals = { path = "superintervals" }
[dev-dependencies]
rstest = "0.22.0"
diff --git a/sequila/sequila-core/src/physical_planner/joins/interval_join.rs b/sequila/sequila-core/src/physical_planner/joins/interval_join.rs
index 46aa4c2..6438949 100644
--- a/sequila/sequila-core/src/physical_planner/joins/interval_join.rs
+++ b/sequila/sequila-core/src/physical_planner/joins/interval_join.rs
@@ -686,6 +686,7 @@ enum IntervalJoinAlgorithm {
IntervalTree(FnvHashMap>),
ArrayIntervalTree(FnvHashMap>),
Lapper(FnvHashMap>),
+ SuperIntervals(FnvHashMap>),
CoitreesNearest(
FnvHashMap<
u64,
@@ -719,6 +720,9 @@ impl Debug for IntervalJoinAlgorithm {
f.debug_struct("ArrayIntervalTree").field("0", m).finish()
}
IntervalJoinAlgorithm::Lapper(m) => f.debug_struct("Lapper").field("0", m).finish(),
+ IntervalJoinAlgorithm::SuperIntervals(m) => {
+ f.debug_struct("SuperIntervals").field("0", m).finish()
+ }
}
}
}
@@ -815,6 +819,20 @@ impl IntervalJoinAlgorithm {
IntervalJoinAlgorithm::Lapper(hashmap)
}
+ Algorithm::SuperIntervals => {
+ let hashmap = hash_map
+ .into_iter()
+ .map(|(k, v)| {
+ let mut map = superintervals::IntervalMap::new();
+ for s in v {
+ map.add(s.start, s.end, s.position);
+ }
+ map.build();
+ (k, map)
+ })
+ .collect::>>();
+ IntervalJoinAlgorithm::SuperIntervals(hashmap)
+ }
}
}
@@ -954,6 +972,13 @@ impl IntervalJoinAlgorithm {
}
}
}
+ IntervalJoinAlgorithm::SuperIntervals(hashmap) => {
+ if let Some(intervals) = hashmap.get(&k) {
+ for val in intervals.search_values_iter(start, end) {
+ f(val);
+ }
+ }
+ }
}
}
}
@@ -1344,6 +1369,7 @@ mod tests {
Some(Algorithm::Coitrees),
Some(Algorithm::IntervalTree),
Some(Algorithm::ArrayIntervalTree),
+ Some(Algorithm::SuperIntervals),
];
for alg in algs {
@@ -1439,6 +1465,7 @@ mod tests {
Some(Algorithm::IntervalTree),
Some(Algorithm::ArrayIntervalTree),
Some(Algorithm::Lapper),
+ Some(Algorithm::SuperIntervals),
];
let schema = &schema();
diff --git a/sequila/sequila-core/src/session_context.rs b/sequila/sequila-core/src/session_context.rs
index abcb24d..9dab7d1 100644
--- a/sequila/sequila-core/src/session_context.rs
+++ b/sequila/sequila-core/src/session_context.rs
@@ -65,6 +65,7 @@ pub enum Algorithm {
IntervalTree,
ArrayIntervalTree,
Lapper,
+ SuperIntervals,
CoitreesNearest,
CoitreesCountOverlaps,
}
@@ -90,6 +91,7 @@ impl FromStr for Algorithm {
"intervaltree" => Ok(Algorithm::IntervalTree),
"arrayintervaltree" => Ok(Algorithm::ArrayIntervalTree),
"lapper" => Ok(Algorithm::Lapper),
+ "superintervals" => Ok(Algorithm::SuperIntervals),
"coitreesnearest" => Ok(Algorithm::CoitreesNearest),
"coitreescountoverlaps" => Ok(Algorithm::CoitreesCountOverlaps),
_ => Err(ParseAlgorithmError(format!(
@@ -107,6 +109,7 @@ impl std::fmt::Display for Algorithm {
Algorithm::IntervalTree => "IntervalTree",
Algorithm::ArrayIntervalTree => "ArrayIntervalTree",
Algorithm::Lapper => "Lapper",
+ Algorithm::SuperIntervals => "SuperIntervals",
Algorithm::CoitreesNearest => "CoitreesNearest",
Algorithm::CoitreesCountOverlaps => "CoitreesCountOverlaps",
};
diff --git a/sequila/sequila-core/superintervals/.gitattributes b/sequila/sequila-core/superintervals/.gitattributes
new file mode 100644
index 0000000..f9198e0
--- /dev/null
+++ b/sequila/sequila-core/superintervals/.gitattributes
@@ -0,0 +1,10 @@
+*.py linguist-detectable=true
+*.rs linguist-detectable=true
+*.cpp linguist-detectable=true
+*.cc linguist-detectable=true
+*.h linguist-detectable=true
+*.hpp linguist-detectable=true
+
+test/* linguist-detectable=false
+examples/* linguist-detectable=false
+*.md linguist-detectable=false
\ No newline at end of file
diff --git a/sequila/sequila-core/superintervals/.github/workflows/python-package.yml b/sequila/sequila-core/superintervals/.github/workflows/python-package.yml
new file mode 100644
index 0000000..364e5be
--- /dev/null
+++ b/sequila/sequila-core/superintervals/.github/workflows/python-package.yml
@@ -0,0 +1,28 @@
+name: Python package
+
+on:
+ push:
+ branches: [ "main" ]
+ pull_request:
+ branches: [ "main" ]
+
+jobs:
+ build:
+
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+
+ steps:
+ - uses: actions/checkout@v4
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v3
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install .
+
diff --git a/sequila/sequila-core/superintervals/.gitignore b/sequila/sequila-core/superintervals/.gitignore
new file mode 100644
index 0000000..375c8ea
--- /dev/null
+++ b/sequila/sequila-core/superintervals/.gitignore
@@ -0,0 +1,45 @@
+/test/run-cpp-libs
+/test/run-cpp-libs.dSYM/
+/test/run_tests.dSYM/
+/test/run-tests
+/test/*.bed
+/build/
+/test/bench_data/
+/test/3rd-party/coitrees/target/
+/test/intervaldb.o
+/superintervals.egg-info/
+/superintervals/intervalset.cpp
+/superintervals/intervalset.*.so
+/test/bed-intersect
+/superintervals/superintervals.cpp
+
+
+# Added by cargo
+
+/target
+/test/bed-intersect-si
+/src/intervalset.cpython-311-x86_64-linux-gnu.so
+/src/superintervals/intervalset.cpp
+/src/superintervals.cpp
+/src/superintervals/intervalset.cpython-310-x86_64-linux-gnu.so
+/src/superintervals/intervalset.cpp
+/src/superintervals/intervalset.cpython-310-x86_64-linux-gnu.so
+/test/benchmark.png
+/test/cgranges.o
+/test/chr1.genome
+/old/
+/src/R/src/superintervals.so
+/src/R/src/RcppExports.o
+/src/R/src/intervalmap_r.o
+/test/bed-intersect-rl
+/src/superintervals/intervalmap.cpp
+/src/R/..Rcheck/
+/src/R/superintervals_0.99.0.tar.gz
+/src/R/superintervals.Rcheck/
+/src/superintervals.egg-info/
+/R/..Rcheck/
+/R/superintervals.Rcheck/
+/R/superintervals_0.99.0.tar.gz
+/test/3rd-party/rust-lapper_kc/target/
+/test/3rd-party/rust-lapper_kc/benches/
+/test/3rd-party/rust-lapper_kc/images/
diff --git a/sequila/sequila-core/superintervals/Cargo.toml b/sequila/sequila-core/superintervals/Cargo.toml
new file mode 100644
index 0000000..27ec561
--- /dev/null
+++ b/sequila/sequila-core/superintervals/Cargo.toml
@@ -0,0 +1,59 @@
+[package]
+name = "superintervals"
+description = "Interval overlap library"
+version = "0.3.6"
+authors = ["Kez Cleal. "]
+edition = "2021"
+repository = "https://github.com/kcleal/superintervals"
+homepage = "https://github.com/kcleal/superintervals"
+documentation = "https://github.com/kcleal/superintervals"
+readme = "README.md"
+license-file = "LICENSE"
+exclude = [
+ "test/*",
+ "src/superintervals/*",
+ "src/R/*",
+ "src/superintervals.egg*",
+ ".idea",
+ "dist",
+ "py*", "setup.py", "*.h", "*.hpp", "MANIFEST.in", ".gitignore"
+]
+
+[lib]
+path = "src/superintervals.rs"
+
+[dependencies]
+serde = { version = "1.0", features = ["derive"] }
+aligned-vec = { version = "0.6.4", features = ["serde"] }
+
+[profile.release]
+debug = 0
+strip = "symbols"
+lto = true
+opt-level = 3
+codegen-units = 1
+
+[features]
+nosimd = []
+
+[profile.dev.package."*"]
+opt-level = 3 # Compile dependencies with optimizations on even in debug mode.
+
+[profile.no-opt]
+inherits = "dev"
+opt-level = 0
+
+[profile.profiling]
+inherits = "release"
+debug = true
+strip = false
+
+[dev-dependencies]
+rand = "0.8"
+fnv = "1.0.7"
+libc = "0.2"
+clap = { version = "4.3.7", features = ["derive"] }
+bincode = "1.3.3"
+
+[[example]]
+name = "bed-intersect-si"
\ No newline at end of file
diff --git a/sequila/sequila-core/superintervals/LICENSE b/sequila/sequila-core/superintervals/LICENSE
new file mode 100644
index 0000000..4dfb928
--- /dev/null
+++ b/sequila/sequila-core/superintervals/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 Kez Cleal
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/sequila/sequila-core/superintervals/README.md b/sequila/sequila-core/superintervals/README.md
new file mode 100644
index 0000000..b038bc3
--- /dev/null
+++ b/sequila/sequila-core/superintervals/README.md
@@ -0,0 +1,197 @@
+SuperIntervals
+==============
+
+A fast, memory-efficient data structure for interval intersection queries.
+SuperIntervals uses a novel superset-index approach that maintains
+intervals in position-sorted order, enabling cache-friendly searches and SIMD-optimized counting.
+
+Available for [C++](#cpp), [Rust](#rust), [Python](#python).
+The R package is hosted at https://github.com/kcleal/superintervalsr.
+
+### Features:
+
+- Linear-time index construction from sorted intervals
+- Cache-friendly querying
+- SIMD acceleration (AVX2/Neon) for counting operations
+- Small memory overhead (one size_t per interval)
+- Optional Eytzinger memory layout for slightly faster queries (C++ only)
+- No dependencies, header only
+
+### Notes:
+
+- Intervals are considered end-inclusive
+- The build() function must be called before any queries
+- Found intervals are returned in **reverse** position-sorted order
+
+## Python
+
+Install using `pip install superintervals`
+
+```python
+from superintervals import IntervalMap
+
+imap = IntervalMap()
+imap.add(10, 20, 'A')
+imap.build()
+results = imap.search_values(8, 20) # ['A']
+```
+
+Python API documentation can be found here:
+https://github.com/kcleal/superintervals/blob/main/src/superintervals/README.md
+
+
+## Cpp
+
+Header only implementation, copy to your include directory.
+
+```cpp
+#include "SuperIntervals.hpp"
+
+si::IntervalMap imap;
+imap.add(1, 5, "A");
+imap.build();
+std::vector results;
+imap.search_values(4, 9, results);
+```
+
+C++ API documentation can be found here:
+https://github.com/kcleal/superintervals/blob/main/src/README_cpp.md
+
+
+## Rust
+
+Add to your project using `cargo add superintervals`
+
+```rust
+use superintervals::IntervalMap;
+
+let mut imap = IntervalMap::new();
+imap.add(1, 5, "A");
+imap.build();
+let mut results = Vec::new();
+imap.search_values(4, 11, &mut results);
+```
+
+Rust API documentation can be found here:
+https://github.com/kcleal/superintervals/blob/main/src/README_rust.md
+
+
+
+## Test programs
+Test programs expect plain text BED files and only assess chr1 records - other chromosomes are ignored.
+
+C++ program compares SuperIntervals, ImplicitIntervalTree, IntervalTree and NCLS:
+```
+cd test; make
+./run-tests
+./run-cpp-libs a.bed b.bed
+```
+
+Rust program:
+```
+RUSTFLAGS="-Ctarget-cpu=native" cargo run --release --example bed-intersect-si
+cargo run --release --example bed-intersect-si a.bed b.bed
+```
+
+Python program:
+```
+python test/run-py-libs.py a.bed b.bed
+```
+
+R program:
+```
+Rscript src/R/benchmark.R
+```
+
+## Benchmark
+
+SuperIntervals (SI) was compared with:
+
+- Coitrees (Rust: https://github.com/dcjones/coitrees)
+- Implicit Interval Tree (C++: https://github.com/lh3/cgranges)
+- Interval Tree (C++: https://github.com/ekg/intervaltree)
+- Nested Containment List (C: https://github.com/pyranges/ncls/tree/master/ncls/src)
+
+Main results:
+
+- Finding interval intersections is on average ~1.5-3x faster than other libraries (Coitrees for Rust, Implicit Interval Tree for C++), with some
+exceptions. Coitrees-s was faster for one test (ONT reads, sorted DB53 reads).
+- The SIMD counting performance of coitrees and superintervals was similar.
+
+Datasets https://github.com/kcleal/superintervals/releases/download/v0.2.0/data.tar.gz:
+
+- `rna / anno` RNA-seq reads and annotations from cgranges repository
+- `ONT reads` nanopore alignments from sample PAO33946 chr1, converted to bed format
+- `DB53 reads` paired-end reads from sample DB53, NCBI BioProject PRJNA417592, chr1, converted to bed format
+- `mito-b, mito-a` paired-end reads from sample DB53 chrM, converted to bed format (mito-b and mito-a are the same)
+- `genes` UCSC genes from hg19
+
+Test programs use internal timers and print data to stdout, measuring the index time, and time to find all intersections. Other steps such as file IO are ignored. Test programs also only assess chr1 bed records - other chromosomes are ignored. For 'chrM' records, the M was replaced with 1 using sed. Data were assessed in position sorted and random order. Datasets can be found on the Releases page, and the test/run_tools.sh script has instructions for how to repeat the benchmark.
+
+Timings were in microseconds using an i9-11900K, 64 GB, 2TB NVMe machine.
+
+## Finding interval intersections
+
+- Coitrees-s uses the SortedQuerent version of coitrees
+- SI = superintervals. Eytz refers to the eytzinger layout. -rs is the Rust implementation.
+
+### Intervals in sorted order
+
+| | Coitrees | Coitrees-s | SuperIntervals-rs | SuperIntervalsEytz-rs | ImplicitITree-C++ | IntervalTree-C++ | NCLS-C | SuperIntervals-C++ | SuperIntervalsEytz-C++ |
+| --------------------- | -------- | ---------- |-------------------| --------------------- | ----------------- | ---------------- | ------ | ------------------ | ---------------------- |
+| DB53 reads, ONT reads | 1668 | 3179 | **757** | **757** | 3831 | 44404 | 10642 | **1315** | 1358 |
+| DB53 reads, genes | 55 | 84 | **21** | **21** | 122 | 109 | 291 | 42 | **40** |
+| ONT reads, DB53 reads | 6504 | **3354** | 3859 | 3854 | 17949 | 12280 | 30772 | 5290 | **4462** |
+| anno, rna | 50 | 35 | **18** | **18** | 127 | 90 | 208 | 29 | **22** |
+| genes, DB53 reads | 1171 | 1018 | 301 | **296** | 3129 | 1315 | 1780 | 442 | **323** |
+| mito-b, mito-a | 34769 | 34594 | 16971 | **16952** | 93900 | 107660 | 251707 | 33177 | **32985** |
+| rna, anno | 31 | 23 | 21 | **20** | 70 | 55 | 233 | 28 | **27** |
+
+### Intervals in random order
+
+| | Coitrees | Coitrees-s | SuperIntervals-rs | SuperIntervalsEytz-rs | ImplicitITree-C++ | IntervalTree-C++ | NCLS-C | SuperIntervals-C++ | SuperIntervalsEytz-C++ |
+| --------------------- | -------- | ---------- | ----------------- | --------------------- | ----------------- | ---------------- | ------ | ------------------ | ---------------------- |
+| DB53 reads, ONT reads | 2943 | 4663 | 1356 | **1355** | 6505 | 46743 | 11947 | 2491 | **2169** |
+| DB53 reads, genes | 78 | 130 | 27 | **26** | 170 | 125 | 305 | 58 | **51** |
+| ONT reads, DB53 reads | 16650 | 18931 | 16116 | **16037** | 38677 | 27832 | 53452 | **23003** | 23232 |
+| anno, rna | 89 | 105 | **54** | **54** | 188 | 143 | 294 | **58** | 60 |
+| genes, DB53 reads | 2222 | 2424 | 1693 | **1684** | 4490 | 2701 | 3605 | **1251** | 1749 |
+| mito-b, mito-a | 38030 | 86309 | **18326** | 18368 | 125336 | 118321 | 256293 | 42195 | **41695** |
+| rna, anno | 53 | 73 | **45** | **45** | 137 | 83 | 311 | **52** | **52** |
+
+## Counting interval intersections
+
+### Intervals in sorted order
+
+| | Coitrees | SuperIntervals-rs | SuperIntervalsEytz-rs | SuperIntervals-C++ | SuperIntervalsEytz-C++ |
+| --------------------- | -------- | ----------------- | --------------------- | ------------------ | ---------------------- |
+| DB53 reads, ONT reads | 551 | 370 | 371 | **241** | 263 |
+| DB53 reads, genes | 28 | 12 | 12 | 8 | **7** |
+| ONT reads, DB53 reads | 2478 | 1909 | 1890 | 2209 | **1312** |
+| anno, rna | 26 | 14 | 14 | 22 | **11** |
+| genes, DB53 reads | 747 | 321 | 336 | 446 | **290** |
+| mito-b, mito-a | 6894 | 6727 | 6746 | 3088 | **2966** |
+| rna, anno | **9** | 13 | 13 | 12 | 10 |
+
+### Intervals in random order
+
+| | Coitrees | SuperIntervals-rs | SuperIntervalsEytz-rs | SuperIntervals-C++ | SuperIntervalsEytz-C++ |
+| --------------------- | -------- | ----------------- | --------------------- | ------------------ | ---------------------- |
+| DB53 reads, ONT reads | 1988 | 972 | 969 | 1016 | **778** |
+| DB53 reads, genes | 53 | 20 | 20 | 16 | **13** |
+| ONT reads, DB53 reads | 6692 | 8864 | 8733 | **8182** | 9523 |
+| anno, rna | 52 | 49 | 48 | **47** | 50 |
+| genes, DB53 reads | 1503 | 1628 | 1592 | **1120** | 1623 |
+| mito-b, mito-a | 14354 | 7579 | 7600 | 4442 | **4383** |
+| rna, anno | 22 | 30 | 29 | **25** | **25** |
+
+
+## Acknowledgements
+
+- The rust test program borrows heavily from the coitrees package
+- The superset-index implemented here exploits a similar interval ordering as described in
+Schmidt 2009 "Interval Stabbing Problems in Small Integer Ranges". However, the superset-index has several advantages including
+ 1. An implicit memory layout
+ 1. General purpose implementation (not just small integer ranges)
+ 1. SIMD counting algorithm
+- The Eytzinger layout was adapted from Sergey Slotin, Algorithmica
diff --git a/sequila/sequila-core/superintervals/examples/bed-intersect-si.rs b/sequila/sequila-core/superintervals/examples/bed-intersect-si.rs
new file mode 100644
index 0000000..c3bffac
--- /dev/null
+++ b/sequila/sequila-core/superintervals/examples/bed-intersect-si.rs
@@ -0,0 +1,295 @@
+use std::error::Error;
+use std::fs::File;
+use std::io::{BufRead, BufReader, Write};
+use std::str;
+use std::time::Instant;
+extern crate fnv;
+use clap::Parser;
+use fnv::FnvHashMap;
+extern crate libc;
+
+use superintervals::IntervalMap;
+// use IntervalMap::IntervalMapEytz;
+
+use bincode;
+
+// Define a trait that all IntervalMap subclasses implement
+// pub trait IntervalCollection {
+// fn new() -> Self;
+// fn add(&mut self, start: i32, end: i32, value: T);
+// fn index(&mut self);
+// }
+//
+// // Implement the trait for IntervalMap
+// impl IntervalCollection for IntervalMap {
+// fn new() -> Self {
+// IntervalMap::new()
+// }
+// fn add(&mut self, start: i32, end: i32, value: T) {
+// self.add(start, end, value);
+// }
+// fn index(&mut self) {
+// self.index();
+// }
+// }
+
+// Implement the trait for IntervalMapEytz
+// impl IntervalCollection for IntervalMapEytz {
+// fn new() -> Self {
+// IntervalMapEytz::new()
+// }
+// fn add(&mut self, start: i32, end: i32, value: T) {
+// self.add(start, end, value);
+// }
+// fn index(&mut self) {
+// self.index();
+// }
+// }
+
+type GenericError = Box;
+
+// Parse a i32 with no checking whatsoever. (e.g. non-number characters will just)
+fn i32_from_bytes_uncheckd(s: &[u8]) -> i32 {
+ if s.is_empty() {
+ 0
+ } else if s[0] == b'-' {
+ -s[1..].iter().fold(0, |a, b| a * 10 + (b & 0x0f) as i32)
+ } else {
+ s.iter().fold(0, |a, b| a * 10 + (b & 0x0f) as i32)
+ }
+}
+
+fn parse_bed_line(line: &[u8]) -> (&str, i32, i32) {
+ let n = line.len() - 1;
+ let mut p = 0;
+ for c in &line[p..n] {
+ if *c == b'\t' {
+ break;
+ }
+ p += 1;
+ }
+ let seqname = unsafe { str::from_utf8_unchecked(&line[..p]) };
+ p += 1;
+ let p0 = p;
+
+ for c in &line[p..n] {
+ if *c == b'\t' {
+ break;
+ }
+ p += 1;
+ }
+ let first = i32_from_bytes_uncheckd(&line[p0..p]);
+ p += 1;
+ let p0 = p;
+
+ for c in &line[p..n] {
+ if *c == b'\t' {
+ break;
+ }
+ p += 1;
+ }
+ let last = i32_from_bytes_uncheckd(&line[p0..p]) - 1;
+
+ (seqname, first, last)
+}
+
+// Read a bed file into a IntervalMap
+// fn read_bed_file>(path: &str) -> Result, GenericError> {
+fn read_bed_file(path: &str) -> Result>, GenericError> {
+ let mut nodes = FnvHashMap::default();
+ let file = File::open(path)?;
+ let mut rdr = BufReader::new(file);
+ let mut line = Vec::new();
+ while rdr.read_until(b'\n', &mut line).unwrap() > 0 {
+ let (seqname, first, last) = parse_bed_line(&line);
+ if seqname != "chr1" || last < 0 || last < first {
+ line.clear();
+ continue;
+ }
+ // let intervals = nodes.entry(seqname.to_string()).or_insert_with(I::new);
+ let intervals = nodes
+ .entry(seqname.to_string())
+ .or_insert_with(IntervalMap::new);
+ intervals.add(first, last, ());
+ line.clear();
+ }
+ let now = Instant::now();
+ for intervals in nodes.values_mut() {
+ intervals.build();
+ }
+ eprint!("{},", now.elapsed().as_micros());
+ std::io::stderr().flush().unwrap();
+ Ok(nodes)
+}
+
+fn query_bed_files(filename_a: &str, filename_b: &str) -> Result<(), GenericError> {
+ let file = File::open(filename_b)?;
+ let mut rdr = BufReader::new(file);
+ let mut ranges: Vec<(i32, i32)> = Vec::new();
+ let mut line = Vec::new();
+ while rdr.read_until(b'\n', &mut line).unwrap() > 0 {
+ let (chrom, first, last) = parse_bed_line(&line);
+ if chrom != "chr1" || last < 0 || last < first {
+ line.clear();
+ continue;
+ }
+ ranges.push((first, last));
+ line.clear();
+ }
+ //
+ eprint!("SuperIntervals-rs,");
+ // let mut trees: FnvHashMap> = read_bed_file::>(filename_a)?;
+ let mut trees: FnvHashMap> = read_bed_file(filename_a)?;
+ let intervals: &mut IntervalMap<()> = trees
+ .get_mut("chr1")
+ .ok_or("Chromosome intervals not found")?;
+
+ // Verify the deserialized data
+ let serialized_size = bincode::serialized_size(&intervals).unwrap();
+ assert_ne!(serialized_size, 0);
+
+ // Find overlaps (collecting results)
+ let mut total_found = 0;
+ let mut results = Vec::new();
+ results.reserve(10000);
+ let mut now = Instant::now();
+ for &(first, last) in &ranges {
+ intervals.search_values(first, last, &mut results);
+ total_found += results.len();
+ results.clear();
+ // for _value in intervals.search_keys_iter(first, last) {
+ // total_found += 1;
+ // }
+ }
+ eprint!("{},{},", now.elapsed().as_micros(), total_found);
+ std::io::stderr().flush().unwrap();
+
+ // Find overlaps2
+ total_found = 0;
+ now = Instant::now();
+ for &(first, last) in &ranges {
+ intervals.search_values_large(first, last, &mut results);
+ total_found += results.len();
+ results.clear();
+ }
+ eprint!("{},{},", now.elapsed().as_micros(), total_found);
+ std::io::stderr().flush().unwrap();
+
+ // Count overlaps
+ let mut n_overlaps = 0;
+ now = Instant::now();
+ for &(first, last) in &ranges {
+ n_overlaps += intervals.count(first, last);
+ }
+ eprint!("{},{},", now.elapsed().as_micros(), n_overlaps);
+ std::io::stderr().flush().unwrap();
+
+ // Count overlaps exponential search
+ let mut n_overlaps = 0;
+ now = Instant::now();
+ for &(first, last) in &ranges {
+ n_overlaps += intervals.count_large(first, last);
+ }
+ eprint!("{},{}\n", now.elapsed().as_micros(), n_overlaps);
+ std::io::stderr().flush().unwrap();
+
+ //
+ // eprint!("IntervalMapEytz-rs,");
+ // let mut trees2: FnvHashMap> = read_bed_file::>(filename_a)?;
+ // let intervals2: &mut IntervalMapEytz<()> = trees2.get_mut("chr1").ok_or("Chromosome intervals not found")?;
+ //
+ // // Find overlaps (collecting results)
+ // total_found = 0;
+ // results.clear();
+ // results.reserve(10000);
+ // now = Instant::now();
+ // for &(first, last) in &ranges {
+ // intervals2.find_overlaps(first, last, &mut results);
+ // total_found += results.len();
+ // results.clear();
+ // }
+ // eprint!("{},{},", now.elapsed().as_micros(), total_found);
+ // std::io::stderr().flush().unwrap();
+ //
+ // // Count overlaps
+ // n_overlaps = 0;
+ // now = Instant::now();
+ // for &(first, last) in &ranges {
+ // n_overlaps += intervals2.count(first, last);
+ // }
+ // eprint!("{},{}\n", now.elapsed().as_micros(), n_overlaps);
+ // std::io::stderr().flush().unwrap();
+ //
+ Ok(())
+}
+
+#[derive(Parser, Debug)]
+#[command(about = " Find overlaps between two groups of intervals ")]
+struct Args {
+ /// intervals to index
+ #[arg(value_name = "intervals.bed")]
+ input1: String,
+ /// query intervals
+ #[arg(value_name = "queries.bed")]
+ input2: String,
+
+ /// compute proportion of queries covered
+ #[arg(short = 'c', long)]
+ coverage: bool,
+}
+
+fn query_bed_files_coverage(filename_a: &str, filename_b: &str) -> Result<(), GenericError> {
+ // let mut trees: FnvHashMap> = read_bed_file::>(filename_a)?;
+ let mut trees: FnvHashMap> = read_bed_file(filename_a)?;
+
+ let file = File::open(filename_b)?;
+ let mut rdr = BufReader::new(file);
+ let mut line = Vec::new();
+
+ let mut total_count: usize = 0;
+ let now = Instant::now();
+
+ while rdr.read_until(b'\n', &mut line).unwrap() > 0 {
+ let (seqname, first, last) = parse_bed_line(&line);
+ let mut count: usize = 0;
+ let mut cov: i32 = 0;
+ if let Some(seqname_tree) = trees.get_mut(seqname) {
+ let countcov = seqname_tree.coverage(first, last);
+ count = countcov.0;
+ cov = countcov.1;
+ }
+
+ unsafe {
+ let linelen = line.len();
+ line[linelen - 1] = b'\0';
+ libc::printf(
+ b"%s\t%u\t%u\n\0".as_ptr() as *const libc::c_char,
+ line.as_ptr() as *const libc::c_char,
+ count as u32,
+ cov,
+ );
+ }
+ total_count += count;
+ line.clear();
+ }
+
+ eprintln!("overlap: {}s", now.elapsed().as_millis() as f64 / 1000.0);
+ eprintln!("With coverage func total overlaps: {}", total_count);
+
+ Ok(())
+}
+
+fn main() {
+ let matches = Args::parse();
+ let input1 = matches.input1.as_str();
+ let input2 = matches.input2.as_str();
+ let result;
+ if matches.coverage {
+ result = query_bed_files_coverage(input1, input2);
+ } else {
+ result = query_bed_files(input1, input2);
+ }
+ if let Err(err) = result {
+ println!("error: {}", err)
+ }
+}
diff --git a/sequila/sequila-core/superintervals/pyproject.toml b/sequila/sequila-core/superintervals/pyproject.toml
new file mode 100644
index 0000000..1b82656
--- /dev/null
+++ b/sequila/sequila-core/superintervals/pyproject.toml
@@ -0,0 +1,14 @@
+[build-system]
+requires = [
+ "setuptools>=42",
+ "wheel",
+ "Cython"
+]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "superintervals"
+version = "0.3.6"
+description = "Rapid interval intersections"
+dependencies = ['Cython']
+authors = [{name = "Kez Cleal", email = "clealk@cardiff.ac.uk"}]
\ No newline at end of file
diff --git a/sequila/sequila-core/superintervals/setup.py b/sequila/sequila-core/superintervals/setup.py
new file mode 100644
index 0000000..5d9fef8
--- /dev/null
+++ b/sequila/sequila-core/superintervals/setup.py
@@ -0,0 +1,22 @@
+from setuptools import setup, find_packages, Extension
+from Cython.Build import cythonize
+
+ext_modules = [
+ Extension("superintervals.intervalmap",
+ ["src/superintervals/intervalmap.pyx"],
+ include_dirs=["src"],
+ language="c++",
+ extra_compile_args=["-std=c++17"])
+]
+
+print('PAKCAGES', find_packages(where='src')) # Add this line for debugging
+
+setup(
+ name='superintervals',
+ description="Rapid interval intersections",
+ author="Kez Cleal",
+ author_email="clealk@cardiff.ac.uk",
+ packages=find_packages(where='src'),
+ package_dir={"": "src"},
+ ext_modules=cythonize(ext_modules),
+)
\ No newline at end of file
diff --git a/sequila/sequila-core/superintervals/src/README_cpp.md b/sequila/sequila-core/superintervals/src/README_cpp.md
new file mode 100644
index 0000000..b10bcfc
--- /dev/null
+++ b/sequila/sequila-core/superintervals/src/README_cpp.md
@@ -0,0 +1,104 @@
+# Superintervals C++ API
+
+
+Header only implementation, copy to your include directory.
+
+```cpp
+#include "SuperIntervals.hpp"
+
+si::IntervalMap imap;
+imap.add(1, 5, "A");
+imap.build();
+
+// Collect results into a vector
+std::vector results;
+imap.search_values(4, 9, results);
+
+// Or use lazy iterator interfaces
+for (const auto &value : imap.search_values_iter(query_start, query_end)) {
+ std::cout << "Found: " << value << std::endl;
+}
+```
+
+### API Reference
+
+**IntervalMap** class (also **IntervalMapEytz** for Eytzinger layout):
+
+- `void add(S start, S end, const T& value)`
+ Add interval with associated value
+
+
+- `void build()`
+ Build index (required before queries)
+
+
+- `void clear()`
+ Remove all intervals
+
+
+- `void reserve(size_t n)`
+ Reserve space for n intervals
+
+
+- `size_t size()`
+ Get number of intervals
+
+
+- `const Interval& at(size_t index)`
+ Get Interval at index
+
+
+- `void at(size_t index, Interval& interval)`
+ Fill provided Interval object
+
+
+- `bool has_overlaps(S start, S end)`
+ Check if any intervals overlap range
+
+
+- `size_t count(S start, S end)`
+ Count overlapping intervals (SIMD optimized)
+
+
+- `size_t count_linear(S start, S end)`
+ Count overlapping intervals (linear)
+
+
+- `size_t count_large(S start, S end)`
+ Count optimized for large ranges
+
+
+- `void search_values(S start, S end, std::vector& found)`
+ Fill vector with values of overlapping intervals
+
+
+- `void search_values_large(S start, S end, std::vector& found)`
+ Search optimized for large ranges
+
+
+- `void search_idxs(S start, S end, std::vector& found)`
+ Fill vector with indices of overlapping intervals
+
+
+- `void search_keys(S start, S end, std::vector>& found)`
+ Fill vector with (start,end) pairs
+
+
+- `void search_items(S start, S end, std::vector>& found)`
+ Fill vector with Interval objects
+
+
+- `void search_point(S point, std::vector& found)`
+ Find intervals containing single point
+
+
+- `void coverage(S start, S end, std::pair& result)`
+ Get pair(count, total_coverage) for range
+
+
+- `IndexRange search_idxs(S start, S end)`
+ Returns IndexRange for range-based loops over indices
+
+
+- `ItemRange search_items(S start, S end)`
+ Returns ItemRange for range-based loops over intervals
\ No newline at end of file
diff --git a/sequila/sequila-core/superintervals/src/README_rust.md b/sequila/sequila-core/superintervals/src/README_rust.md
new file mode 100644
index 0000000..ac32a56
--- /dev/null
+++ b/sequila/sequila-core/superintervals/src/README_rust.md
@@ -0,0 +1,109 @@
+# Superintervals rust API
+
+Add to your project using `cargo add superintervals`
+
+```rust
+use superintervals::IntervalMap;
+
+let mut imap = IntervalMap::new();
+imap.add(1, 5, "A");
+imap.build();
+
+// Collect results into a Vec
+let mut results = Vec::new();
+imap.search_values(4, 11, &mut results);
+
+// Or use lazy iterator interfaces
+for value in imap.search_values_iter(query_start, query_end) {
+ println!("Found: {}", value);
+}
+```
+
+### API Reference
+
+**IntervalMap** struct (also **IntervalMapEytz** for Eytzinger layout):
+
+- `fn new() -> Self`
+ Create new IntervalMap
+
+
+- `fn add(&mut self, start: i32, end: i32, value: T)`
+ Add interval with associated value
+
+
+- `fn build(&mut self)`
+ Build index (required before queries)
+
+
+- `fn clear(&mut self)`
+ Remove all intervals
+
+
+- `fn reserve(&mut self, n: usize)`
+ Reserve space for n intervals
+
+
+- `fn size(&self) -> usize`
+ Get number of intervals
+
+
+- `fn at(&self, index: usize) -> Interval`
+ Get Interval at index
+
+
+- `fn has_overlaps(&mut self, start: i32, end: i32) -> bool`
+ Check if any intervals overlap range
+
+
+- `fn count(&mut self, start: i32, end: i32) -> usize`
+ Count overlapping intervals (SIMD optimized)
+
+
+- `fn count_linear(&mut self, start: i32, end: i32) -> usize`
+ Count overlapping intervals (linear)
+
+
+- `fn count_large(&mut self, start: i32, end: i32) -> usize`
+ Count optimized for large ranges
+
+
+- `fn search_values(&mut self, start: i32, end: i32, found: &mut Vec)`
+ Fill vector with values of overlapping intervals
+
+
+- `fn search_values_large(&mut self, start: i32, end: i32, found: &mut Vec)`
+ Search optimized for large ranges
+
+
+- `fn search_idxs(&mut self, start: i32, end: i32, found: &mut Vec)`
+ Fill vector with indices of overlapping intervals
+
+
+- `fn search_keys(&mut self, start: i32, end: i32, found: &mut Vec<(i32, i32)>)`
+ Fill vector with (start,end) pairs
+
+
+- `fn search_items(&mut self, start: i32, end: i32, found: &mut Vec>)`
+ Fill vector with Interval objects
+
+
+- `fn search_stabbed(&mut self, point: i32, found: &mut Vec)`
+ Find intervals containing single point
+
+
+- `fn coverage(&mut self, start: i32, end: i32) -> (usize, i32)`
+ Get (count, total_coverage) for range
+
+
+- `fn search_idxs_iter(&mut self, start: i32, end: i32) -> IndexIterator`
+ Iterator over indices
+
+
+- `fn search_items_iter(&mut self, start: i32, end: i32) -> ItemIterator`
+ Iterator over intervals
+
+- `fn search_keys_iter(&mut self, start: i32, end: i32) -> KeyIterator`
+ Iterator over keys
+
+- `fn search_values_iter(&mut self, start: i32, end: i32) -> ValueIterator`
+ Iterator over values
\ No newline at end of file
diff --git a/sequila/sequila-core/superintervals/src/c_superintervals.h b/sequila/sequila-core/superintervals/src/c_superintervals.h
new file mode 100644
index 0000000..2ce03bc
--- /dev/null
+++ b/sequila/sequila-core/superintervals/src/c_superintervals.h
@@ -0,0 +1,315 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef SUPERINTERVALS_HEADER_INCLUDED
+#define SUPERINTERVALS_HEADER_INCLUDED 1
+#endif
+#include
+#include
+#include
+#include
+
+#ifdef __AVX2__
+ #include
+#elif defined __ARM_NEON
+ #include
+#endif
+
+// Define the Interval struct
+typedef struct {
+ int32_t start;
+ int32_t end;
+ int32_t data;
+} Interval;
+
+// Define the SuperIntervals struct
+typedef struct {
+ int32_t* starts;
+ int32_t* ends;
+ int32_t* data;
+ size_t* branch;
+ int32_t* extent;
+ size_t size;
+ size_t capacity;
+ size_t idx;
+ bool startSorted;
+ bool endSorted;
+} cSuperIntervals;
+
+// Function prototypes
+cSuperIntervals* createSuperIntervals();
+void destroySuperIntervals(cSuperIntervals* si);
+void clearSuperIntervals(cSuperIntervals* si);
+void reserveSuperIntervals(cSuperIntervals* si, size_t n);
+void addInterval(cSuperIntervals* si, int32_t start, int32_t end, int32_t value);
+void sortIntervals(cSuperIntervals* si);
+void indexSuperIntervals(cSuperIntervals* si, bool use_linear);
+void upperBound(cSuperIntervals* si, int32_t value);
+bool anyOverlaps(cSuperIntervals* si, int32_t start, int32_t end);
+void findOverlaps(cSuperIntervals* si, int32_t start, int32_t end, int32_t* found, size_t* found_size);
+size_t countOverlaps(cSuperIntervals* si, int32_t start, int32_t end);
+
+// Helper function prototypes
+static void sortBlock(cSuperIntervals* si, size_t start_i, size_t end_i, int (*compare)(const void*, const void*));
+static size_t eytzinger_helper(int32_t* arr, size_t n, size_t i, size_t k, int32_t* eytz, size_t* eytz_index);
+static int eytzinger(int32_t* arr, size_t n, int32_t* eytz, size_t* eytz_index);
+
+cSuperIntervals* createSuperIntervals() {
+ cSuperIntervals* si = (cSuperIntervals*)malloc(sizeof(cSuperIntervals));
+ si->starts = NULL;
+ si->ends = NULL;
+ si->data = NULL;
+ si->branch = NULL;
+ si->extent = NULL;
+ si->size = 0;
+ si->capacity = 0;
+ si->idx = 0;
+ si->startSorted = true;
+ si->endSorted = true;
+ return si;
+}
+
+void destroySuperIntervals(cSuperIntervals* si) {
+ free(si->starts);
+ free(si->ends);
+ free(si->data);
+ free(si->branch);
+ free(si->extent);
+ free(si);
+}
+
+void clearSuperIntervals(cSuperIntervals* si) {
+ si->size = 0;
+ si->idx = 0;
+}
+
+void reserveSuperIntervals(cSuperIntervals* si, size_t n) {
+ if (n > si->capacity) {
+ si->capacity = n;
+ si->starts = (int32_t*)realloc(si->starts, n * sizeof(int32_t));
+ si->ends = (int32_t*)realloc(si->ends, n * sizeof(int32_t));
+ si->data = (int32_t*)realloc(si->data, n * sizeof(int32_t));
+ }
+}
+
+void addInterval(cSuperIntervals* si, int32_t start, int32_t end, int32_t value) {
+ if (si->size >= si->capacity) {
+ size_t new_capacity = si->capacity == 0 ? 1 : si->capacity * 2;
+ reserveSuperIntervals(si, new_capacity);
+ }
+
+ if (si->startSorted && si->size > 0) {
+ si->startSorted = (start < si->starts[si->size - 1]) ? false : true;
+ if (si->startSorted && start == si->starts[si->size - 1] && end > si->ends[si->size - 1]) {
+ si->endSorted = false;
+ }
+ }
+
+ si->starts[si->size] = start;
+ si->ends[si->size] = end;
+ si->data[si->size] = value;
+ si->size++;
+}
+
+static int compareIntervalsStart(const void* a, const void* b) {
+ const Interval* ia = (const Interval*)a;
+ const Interval* ib = (const Interval*)b;
+ if (ia->start != ib->start) {
+ return ia->start - ib->start;
+ }
+ return ib->end - ia->end; // Sort start, and end in descending order
+}
+
+static int compareIntervalsEnd(const void* a, const void* b) {
+ const Interval* ia = (const Interval*)a;
+ const Interval* ib = (const Interval*)b;
+ return ib->end - ia->end; // Sort by end in descending order
+}
+
+static void sortBlock(cSuperIntervals* si, size_t start_i, size_t end_i,
+ int (*compare)(const void*, const void*)) {
+ size_t range_size = end_i - start_i;
+ Interval* tmp = (Interval*)malloc(range_size * sizeof(Interval));
+
+ for (size_t i = 0; i < range_size; ++i) {
+ tmp[i].start = si->starts[start_i + i];
+ tmp[i].end = si->ends[start_i + i];
+ tmp[i].data = si->data[start_i + i];
+ }
+
+ qsort(tmp, range_size, sizeof(Interval), compare);
+
+ for (size_t i = 0; i < range_size; ++i) {
+ si->starts[start_i + i] = tmp[i].start;
+ si->ends[start_i + i] = tmp[i].end;
+ si->data[start_i + i] = tmp[i].data;
+ }
+
+ free(tmp);
+}
+
+void sortIntervals(cSuperIntervals* si) {
+ if (!si->startSorted) {
+ sortBlock(si, 0, si->size, compareIntervalsStart);
+ si->startSorted = true;
+ si->endSorted = true;
+ } else if (!si->endSorted) {
+ size_t it_start = 0;
+ while (it_start < si->size) {
+ size_t block_end = it_start + 1;
+ bool needs_sort = false;
+ while (block_end < si->size && si->starts[block_end] == si->starts[it_start]) {
+ if (block_end > it_start && si->ends[block_end] > si->ends[block_end - 1]) {
+ needs_sort = true;
+ }
+ ++block_end;
+ }
+ if (needs_sort) {
+ sortBlock(si, it_start, block_end, compareIntervalsEnd);
+ }
+ it_start = block_end;
+ }
+ si->endSorted = true;
+ }
+}
+
+static size_t eytzinger_helper(int32_t* arr, size_t n, size_t i, size_t k, int32_t* eytz, size_t* eytz_index) {
+ if (k < n) {
+ i = eytzinger_helper(arr, n, i, 2*k+1, eytz, eytz_index);
+ eytz[k] = arr[i];
+ eytz_index[k] = i;
+ ++i;
+ i = eytzinger_helper(arr, n, i, 2*k + 2, eytz, eytz_index);
+ }
+ return i;
+}
+
+static int eytzinger(int32_t* arr, size_t n, int32_t* eytz, size_t* eytz_index) {
+ return eytzinger_helper(arr, n, 0, 0, eytz, eytz_index);
+}
+
+void indexSuperIntervals(cSuperIntervals* si, bool use_linear) {
+ if (si->size == 0) {
+ return;
+ }
+
+ sortIntervals(si);
+
+ int32_t* eytz = (int32_t*)malloc((si->size + 1) * sizeof(int32_t));
+ size_t* eytz_index = (size_t*)malloc((si->size + 1) * sizeof(size_t));
+ eytzinger(si->starts, si->size, eytz, eytz_index);
+
+ si->branch = (size_t*)realloc(si->branch, si->size * sizeof(size_t));
+ memset(si->branch, -1, si->size * sizeof(size_t));
+
+ if (!use_linear) {
+ si->extent = (int32_t*)malloc(si->size * sizeof(int32_t));
+ memcpy(si->extent, si->ends, si->size * sizeof(int32_t));
+
+ for (size_t i = 0; i < si->size - 1; ++i) {
+ int32_t e = si->ends[i];
+ for (size_t j = i + 1; j < si->size; ++j) {
+ if (si->ends[j] >= si->ends[i]) {
+ break;
+ }
+ si->branch[j] = i;
+ if (e > si->extent[j]) {
+ si->extent[j] = e;
+ }
+ }
+ }
+ } else {
+ // Linear branch implementation
+ size_t* br = (size_t*)malloc(si->size * sizeof(size_t));
+ int32_t* br_ends = (int32_t*)malloc(si->size * sizeof(int32_t));
+ size_t br_size = 0;
+
+ br[br_size] = 0;
+ br_ends[br_size] = si->ends[0];
+ br_size++;
+
+ for (size_t i = 1; i < si->size; ++i) {
+ while (br_size > 0 && br_ends[br_size - 1] < si->ends[i]) {
+ br_size--;
+ }
+ if (br_size > 0) {
+ si->branch[i] = br[br_size - 1];
+ }
+ br[br_size] = i;
+ br_ends[br_size] = si->ends[i];
+ br_size++;
+ }
+
+ free(br);
+ free(br_ends);
+ }
+
+ free(eytz);
+ free(eytz_index);
+ si->idx = 0;
+}
+
+
+void upperBound(cSuperIntervals* si, int32_t value) {
+ size_t length = si->size - 1;
+ si->idx = 0;
+ const int entries_per_256KB = 256 * 1024 / sizeof(int32_t);
+ const int num_per_cache_line = 64 / sizeof(int32_t) > 1 ? 64 / sizeof(int32_t) : 1;
+
+ if (length >= entries_per_256KB) {
+ while (length >= 3 * num_per_cache_line) {
+ size_t half = length / 2;
+// __builtin_prefetch(&si->starts[si->idx + half / 2]);
+ size_t first_half1 = si->idx + (length - half);
+// __builtin_prefetch(&si->starts[first_half1 + half / 2]);
+ si->idx += (si->starts[si->idx + half] <= value) * (length - half);
+ length = half;
+ }
+ }
+
+ while (length > 0) {
+ size_t half = length / 2;
+ si->idx += (si->starts[si->idx + half] <= value) * (length - half);
+ length = half;
+ }
+
+ if (si->idx > 0 && (si->idx == si->size - 1 || si->starts[si->idx] > value)) {
+ --si->idx;
+ }
+}
+
+bool anyOverlaps(cSuperIntervals* si, int32_t start, int32_t end) {
+ upperBound(si, end);
+ return start <= si->ends[si->idx];
+}
+
+void findOverlaps(cSuperIntervals* si, int32_t start, int32_t end, int32_t* found, size_t* found_size) {
+ if (si->size == 0) {
+ *found_size = 0;
+ return;
+ }
+ upperBound(si, end);
+ size_t i = si->idx;
+ *found_size = 0;
+ while (i > 0) {
+ if (start <= si->ends[i]) {
+ found[(*found_size)++] = si->data[i];
+ --i;
+
+ } else {
+ if (si->branch[i] >= i) { // segfaults here
+ break;
+ }
+ i = si->branch[i];
+ }
+ }
+ if (i == 0 && start <= si->ends[0] && si->starts[0] <= end) {
+ found[(*found_size)++] = si->data[0];
+ }
+}
+
+
+#ifdef __cplusplus
+} // extern C
+#endif
\ No newline at end of file
diff --git a/sequila/sequila-core/superintervals/src/lib.rs b/sequila/sequila-core/superintervals/src/lib.rs
new file mode 100644
index 0000000..5983007
--- /dev/null
+++ b/sequila/sequila-core/superintervals/src/lib.rs
@@ -0,0 +1,4 @@
+mod superintervals;
+
+// Public API - everything available at root level
+pub use superintervals::*;
diff --git a/sequila/sequila-core/superintervals/src/superintervals.hpp b/sequila/sequila-core/superintervals/src/superintervals.hpp
new file mode 100644
index 0000000..5203921
--- /dev/null
+++ b/sequila/sequila-core/superintervals/src/superintervals.hpp
@@ -0,0 +1,1009 @@
+// Version 0.3.6
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+#ifndef SI_NOSIMD
+ #if defined(__AVX2__)
+ #include
+ #elif defined(__ARM_NEON__) || defined(__aarch64__)
+ #include
+ #else
+ #define SI_NOSIMD
+ #endif
+#endif
+
+namespace si {
+
+// Core data structure
+template
+struct Interval {
+ S start, end;
+ T data;
+ Interval() = default;
+ Interval(S s, S e, T d) : start(s), end(e), data(d) {}
+};
+
+/**
+ * @file SuperIntervals.hpp
+ * @brief IntervalMap is an associative data structure for finding interval intersections
+ *
+ * IntervalMap is a template class that provides efficient interval intersection operations.
+ * It supports adding intervals, indexing them for fast queries, and performing various
+ * intersection operations using an implicit superinterval tree algorithm.
+ *
+ * @note Intervals are considered end-inclusive
+ * @note The build() function must be called before any queries. If more intervals are added, call build() again.
+ *
+ * @tparam S The scalar type for interval start and end points (e.g., int, float)
+ * @tparam T The data type associated with each interval
+ */
+template
+
+class IntervalMap {
+ public:
+ std::vector starts;
+// std::vector ends;
+
+ #ifdef __AVX2__
+ alignas(32) std::vector ends;
+ #elif defined(__ARM_NEON)
+ alignas(16) std::vector ends;
+ #else
+ std::vector ends;
+ #endif
+ std::vector branch;
+ std::vector data;
+ bool start_sorted, end_sorted;
+
+ IntervalMap() : start_sorted(true), end_sorted(true) {}
+ virtual ~IntervalMap() = default;
+
+ /**
+ * @brief Clears all intervals and resets the data structure
+ */
+ void clear() noexcept {
+ data.clear(); starts.clear(); ends.clear(); branch.clear(); // idx = 0;
+ }
+
+ /**
+ * @brief Reserves memory for a specified number of intervals
+ * @param n Number of intervals to reserve space for
+ */
+ void reserve(size_t n) {
+ data.reserve(n); starts.reserve(n); ends.reserve(n);
+ }
+
+ /**
+ * @brief Returns the number of intervals in the data structure
+ * @return Number of intervals
+ */
+ size_t size() {
+ return starts.size();
+ }
+
+ /**
+ * @brief Adds a new interval to the data structure
+ * @param start Start point of the interval
+ * @param end End point of the interval
+ * @param value Data associated with the interval
+ */
+ void add(S start, S end, const T& value) {
+ if (start_sorted && !starts.empty()) {
+ start_sorted = (start < starts.back()) ? false : true;
+ if (start_sorted && start == starts.back() && end > ends.back()) {
+ end_sorted = false;
+ }
+ }
+ starts.push_back(start);
+ ends.push_back(end);
+ data.emplace_back(value);
+ }
+
+ /**
+ * @brief Build the index.
+ * This function must be called after adding intervals and before performing any queries.
+ * If more intervals are added after indexing, this function should be called again.
+ */
+ virtual void build() {
+ if (starts.size() == 0) {
+ return;
+ }
+ sort_intervals();
+ branch.resize(starts.size(), SIZE_MAX);
+ std::vector> br;
+ br.reserve((starts.size() / 10) + 1);
+ br.emplace_back() = {ends[0], 0};
+ for (size_t i=1; i < ends.size(); ++i) {
+ while (!br.empty() && br.back().first < ends[i]) {
+ br.pop_back();
+ }
+ if (!br.empty()) {
+ branch[i] = br.back().second;
+ }
+ br.emplace_back() = {ends[i], i};
+ }
+ }
+
+ /**
+ * @brief Retrieves an interval at a specific index
+ * @param index The index of the interval to retrieve
+ * @return The Interval at the specified index
+ */
+ const Interval& at(size_t index) const {
+ return Interval{starts[index], ends[index], data[index]};
+ }
+ /**
+ * @brief Retrieves an interval at a specific index and writes it into the provided Interval object.
+ * @param index The index of the interval to retrieve.
+ * @param itv Reference to an Interval object to populate with the retrieved interval.
+ */
+ void at(size_t index, Interval& itv) {
+ itv.start = starts[index];
+ itv.end = ends[index];
+ itv.data = data[index];
+ }
+
+ // Iterator interfaces
+ class IndexIterator {
+ public:
+ IndexIterator(const IntervalMap* parent, size_t pos, S query_start)
+ : parent_(parent), current_pos_(pos), query_start_(query_start),
+ has_value_(false), value_(0) {
+ next();
+ }
+
+ size_t operator*() const { return value_; }
+
+ IndexIterator& operator++() {
+ next();
+ return *this;
+ }
+
+ bool operator!=(const IndexIterator& other) const {
+ return has_value_ != other.has_value_;
+ }
+
+ bool operator==(const IndexIterator& other) const {
+ return has_value_ == other.has_value_;
+ }
+
+ private:
+ void next() const {
+ if (current_pos_ == SIZE_MAX) {
+ has_value_ = false;
+ return;
+ }
+ if (query_start_ <= parent_->ends[current_pos_]) {
+ value_ = parent_->data[current_pos_];
+ current_pos_ -= 1;
+ has_value_ = true;
+ return;
+ }
+ while (true) {
+ current_pos_ =parent_->branch[current_pos_];
+ if (current_pos_ == SIZE_MAX) {
+ break;
+ }
+ if (query_start_ <= parent_->ends[current_pos_]) {
+ value_ = parent_->data[current_pos_];
+ current_pos_ -= 1;
+ has_value_ = true;
+ return;
+ }
+ }
+ has_value_ = false;
+ return;
+ }
+
+ const IntervalMap* parent_;
+ mutable size_t current_pos_;
+ S query_start_;
+ mutable bool has_value_;
+ mutable size_t value_;
+ };
+
+ class IndexRange {
+ public:
+ IndexRange(const IntervalMap* parent, S start, S end)
+ : parent_(parent), start_(start), end_(end) {}
+ IndexIterator begin() const {
+ const size_t pos = parent_->starts.empty() ? SIZE_MAX : parent_->upper_bound(end_);
+ return IndexIterator(parent_, pos, start_);
+ }
+ IndexIterator end() const {
+ return IndexIterator(parent_, SIZE_MAX, start_);
+ }
+ private:
+ const IntervalMap* parent_;
+ S start_, end_;
+ };
+
+ /**
+ * @brief Creates a range object for iterating over interval indices that intersect [start, end]
+ * @param start Start point of the search range
+ * @param end End point of the search range
+ * @return IndexRange object that can be used with range-based for loops
+ */
+ IndexRange search_idxs(S start, S end) const noexcept {
+ return IndexRange(this, start, end);
+ }
+
+ class ItemIterator {
+ public:
+ ItemIterator(const IntervalMap* parent, size_t pos, S query_start)
+ : parent_(parent), current_pos_(pos), query_start_(query_start),
+ has_value_(false), value_{} {
+ next();
+ }
+
+ size_t operator*() const { return value_; }
+
+ ItemIterator& operator++() {
+ next();
+ return *this;
+ }
+
+ bool operator!=(const ItemIterator& other) const {
+ return has_value_ != other.has_value_;
+ }
+
+ bool operator==(const ItemIterator& other) const {
+ return has_value_ == other.has_value_;
+ }
+
+ private:
+ void next() const {
+ if (current_pos_ == SIZE_MAX) {
+ has_value_ = false;
+ return;
+ }
+ if (query_start_ <= parent_->ends[current_pos_]) {
+ value_.start = parent_->starts[current_pos_];
+ value_.end = parent_->ends[current_pos_];
+ value_.data = parent_->data[current_pos_];
+ current_pos_ -= 1;
+ has_value_ = true;
+ return;
+ }
+ while (true) {
+ current_pos_ =parent_->branch[current_pos_];
+ if (current_pos_ == SIZE_MAX) {
+ break;
+ }
+ if (query_start_ <= parent_->ends[current_pos_]) {
+ value_.start = parent_->starts[current_pos_];
+ value_.end = parent_->ends[current_pos_];
+ value_.data = parent_->data[current_pos_];
+ current_pos_ -= 1;
+ has_value_ = true;
+ return;
+ }
+ }
+ has_value_ = false;
+ return;
+ }
+
+ const IntervalMap* parent_;
+ mutable size_t current_pos_;
+ S query_start_;
+ mutable bool has_value_;
+ mutable Interval value_;
+ };
+
+ class ItemRange {
+ public:
+ ItemRange(const IntervalMap* parent, S start, S end)
+ : parent_(parent), start_(start), end_(end) {}
+ ItemIterator begin() const {
+ const size_t pos = parent_->starts.empty() ? SIZE_MAX : parent_->upper_bound(end_);
+ return ItemIterator(parent_, pos, start_);
+ }
+ ItemIterator end() const {
+ return ItemIterator(parent_, SIZE_MAX, start_);
+ }
+ private:
+ const IntervalMap* parent_;
+ S start_, end_;
+ };
+
+ /**
+ * @brief Creates a range object for iterating over interval items that intersect [start, end]
+ * @param start Start point of the search range
+ * @param end End point of the search range
+ * @return ItemRange object that can be used with range-based for loops
+ */
+ ItemRange search_items(S start, S end) const noexcept {
+ return ItemRange(this, start, end);
+ }
+
+ /**
+ * @brief Finds the largest index such that starts[index] ≤ value.
+ * @param value The upper bound value to search for.
+ * @note After calling this, idx will be set to that index (or SIZE_MAX if none).
+ */
+ virtual inline size_t upper_bound(const S value) const noexcept {
+ size_t length = starts.size();
+ size_t idx = 0;
+ while (length > 1) {
+ const size_t half = length / 2;
+ idx += (starts[idx + half] <= value) * (length - half);
+ length = half;
+ }
+ if (starts[idx] > value) {
+ --idx; // Might underflow to SIZE_MAX
+ }
+ return idx;
+
+// idx = std::distance(starts.begin(),
+// std::upper_bound(starts.begin(), starts.end(), value)) - 1;
+ }
+
+ /**
+ * @brief Narrows a [left…right) range so that starts[left] is the first element ≥ value.
+ * @param value The lower‐bound value to search for.
+ * @param left On entry, the lower end of the search range; on exit, the found index or SIZE_MAX.
+ * @param right The upper end of the search range (exclusive).
+ */
+ inline void upper_bound_range(const S value, size_t& left, const size_t right) const noexcept {
+ // First do exponential search if we have room
+ size_t search_right = right;
+ size_t bound = 1;
+ while (left > 0 && value < starts[left]) {
+ search_right = left;
+ left = (bound <= left) ? left - bound : 0;
+ bound *= 2;
+ }
+ // Now do binary search in the range
+ size_t length = search_right - left;
+ while (length > 1) {
+ const size_t half = length / 2;
+ left += (starts[left + half] < value) * (length - half);
+ length = half;
+ }
+ if (left == 0 && starts[left] >= value) {
+ left = SIZE_MAX;
+ }
+ }
+
+ /**
+ * @brief Collects all data values whose intervals intersect [start,end].
+ * @param start The start of the query interval.
+ * @param end The end of the query interval.
+ * @param found Output vector that will be filled with matching data values.
+ */
+ void search_values(const S start, const S end, std::vector& found) const {
+ if (starts.empty()) {
+ return;
+ }
+ const size_t idx = upper_bound(end);
+ if (idx == SIZE_MAX) {
+ return;
+ }
+ size_t i = idx;
+ while (i != SIZE_MAX && start <= ends[i]) {
+ --i;
+ }
+ if (i == SIZE_MAX) {
+ found.insert(found.end(), data.rend() - idx - 1, data.rend());
+ return;
+ }
+ found.insert(found.end(), data.rend() - idx - 1, data.rend() - i - 1);
+ // This is slightly faster, but would lose the sort order:
+ // found.insert(found.end(), data.begin() + i, data.begin() + idx);
+ i = branch[i];
+ while (i != SIZE_MAX) {
+ if (start <= ends[i]) {
+ found.push_back(data[i]);
+ --i;
+ } else {
+ i = branch[i];
+ }
+ }
+ }
+
+ /**
+ * @brief Like search_intervals, but optimized for large query ranges.
+ * @note Uses exponential search - best when query range is large relative to stored intervals
+ * @param start The start of the query interval.
+ * @param end The end of the query interval.
+ * @param found Output vector that will be filled with matching data values.
+ */
+ void search_values_large(const S start, const S end, std::vector& found) const {
+ if (starts.empty()) {
+ return;
+ }
+ const size_t idx = upper_bound(end);
+ if (idx == SIZE_MAX) {
+ return;
+ }
+ size_t i = idx;
+ upper_bound_range(start, i, idx);
+ while (i != SIZE_MAX && start <= ends[i]) {
+ --i;
+ }
+ if (i == SIZE_MAX) {
+ found.insert(found.end(), data.rend() - idx - 1, data.rend());
+ return;
+ }
+ found.insert(found.end(), data.rend() - idx - 1, data.rend() - i - 1);
+ i = branch[i];
+ while (i != SIZE_MAX) {
+ if (start <= ends[i]) {
+ found.push_back(data[i]);
+ --i;
+ } else {
+ i = branch[i];
+ }
+ }
+ }
+
+ /**
+ * @brief Counts how many intervals intersect [start,end].
+ * @param start The start of the query interval.
+ * @param end The end of the query interval.
+ * @return Number of intervals overlapping the query.
+ */
+ size_t count_linear(const S start, const S end) const noexcept {
+ if (starts.empty()) {
+ return 0;
+ }
+ size_t idx = upper_bound(end);
+ if (idx == SIZE_MAX) {
+ return 0;
+ }
+ size_t starting_i = idx;
+ while (idx != SIZE_MAX && start <= ends[idx]) {
+ --idx;
+ }
+ size_t count = starting_i - idx;
+ if (idx == SIZE_MAX) {
+ return count;
+ }
+ idx = branch[idx];
+ while (idx != SIZE_MAX) {
+ if (start <= ends[idx]) {
+ count += 1;
+ --idx;
+ } else {
+ idx = branch[idx];
+ }
+ }
+ return count;
+ }
+
+ size_t count(const S start, const S end) const noexcept {
+ if (starts.empty()) {
+ return 0;
+ }
+ size_t i = upper_bound(end);
+ if (i == SIZE_MAX) {
+ return 0;
+ }
+ size_t found = 0;
+
+ #if defined(SI_NOSIMD)
+
+ constexpr size_t block = 16;
+ constexpr simd_kind active_simd = simd_kind::none;
+
+ #elif defined(__AVX2__)
+
+ __m256i start_vec = _mm256_set1_epi32(start);
+ constexpr size_t simd_width = 256 / (sizeof(S) * 8);
+ constexpr size_t block = simd_width * 4; // 2 cache lines
+ constexpr simd_kind active_simd = simd_kind::avx2;
+
+ #elif defined(__ARM_NEON__) || defined(__aarch64__)
+
+ int32x4_t start_vec = vdupq_n_s32(start);
+ constexpr size_t simd_width = 128 / (sizeof(S) * 8);
+ uint32x4_t ones = vdupq_n_u32(1);
+ constexpr size_t block = simd_width * 8; // 2 cache lines
+ constexpr simd_kind active_simd = simd_kind::neon;
+
+ #endif
+
+ while (i > 0) {
+ if (start <= ends[i]) {
+ ++found;
+ --i;
+ // Types with width !=4 will use the no-simd path here
+ if constexpr (active_simd == simd_kind::none || sizeof(S) != 4) {
+ while (i > block) {
+ size_t count = 0;
+ for (size_t j = i; j > i - block; --j) {
+ count += (start <= ends[j]) ? 1 : 0;
+ }
+ found += count;
+ i -= block;
+ if (count < block && start > ends[i + 1]) { // check for a branch
+ break;
+ }
+ }
+ }
+ #if defined(__AVX2__)
+ else if constexpr (active_simd == simd_kind::avx2) {
+// while (i > block) {
+// size_t count = 0;
+// for (size_t j = i - block + 1; j < i; j += simd_width) {
+// __m256i ends_vec = _mm256_loadu_si256((__m256i*)(&ends[j - simd_width + 1]));
+// __m256i cmp_mask = _mm256_cmpgt_epi32(start_vec, ends_vec);
+// int mask = _mm256_movemask_ps(_mm256_castsi256_ps(cmp_mask));
+// count += 8 - _mm_popcnt_u32(mask);
+// }
+// found += count;
+// i -= block;
+// if (count < block) {
+// break;
+// }
+// }
+
+ while (i > block) {
+ size_t j = i - block + 1;
+
+ // Load all 4 vectors
+ __m256i ends_vec0 = _mm256_load_si256((__m256i*)(&ends[j]));
+ __m256i ends_vec1 = _mm256_load_si256((__m256i*)(&ends[j + simd_width]));
+ __m256i ends_vec2 = _mm256_load_si256((__m256i*)(&ends[j + 2 * simd_width]));
+ __m256i ends_vec3 = _mm256_load_si256((__m256i*)(&ends[j + 3 * simd_width]));
+
+ // Compare all vectors
+ __m256i cmp_mask0 = _mm256_cmpgt_epi32(start_vec, ends_vec0);
+ __m256i cmp_mask1 = _mm256_cmpgt_epi32(start_vec, ends_vec1);
+ __m256i cmp_mask2 = _mm256_cmpgt_epi32(start_vec, ends_vec2);
+ __m256i cmp_mask3 = _mm256_cmpgt_epi32(start_vec, ends_vec3);
+
+ // Extract masks
+ int mask0 = _mm256_movemask_ps(_mm256_castsi256_ps(cmp_mask0));
+ int mask1 = _mm256_movemask_ps(_mm256_castsi256_ps(cmp_mask1));
+ int mask2 = _mm256_movemask_ps(_mm256_castsi256_ps(cmp_mask2));
+ int mask3 = _mm256_movemask_ps(_mm256_castsi256_ps(cmp_mask3));
+
+ // Count and accumulate
+ size_t count = (8 - _mm_popcnt_u32(mask0)) + (8 - _mm_popcnt_u32(mask1)) +
+ (8 - _mm_popcnt_u32(mask2)) + (8 - _mm_popcnt_u32(mask3));
+
+ found += count;
+ i -= block;
+ if (count < block) {
+ break;
+ }
+ }
+ }
+ #elif defined(__ARM_NEON__) || defined(__aarch64__)
+ else { // NEON
+// while (i > block) {
+// size_t count = 0;
+// uint32x4_t mask, bool_mask;
+// for (size_t j = i - block + 1; j < i; j += simd_width) { // Neon 4 int32 at a time
+// int32x4_t ends_vec = vld1q_s32(&ends[j]);
+// mask = vcgtq_s32(start_vec, ends_vec); // start > ends[j]
+// bool_mask = vaddq_u32(mask, ones);
+// count += vaddvq_u32(bool_mask); // Sum all lanes
+// }
+// found += count;
+// i -= block;
+// if (count < block) { // check for overlap again, before checking for branch?
+// break;
+// }
+// }
+ while (i > block) {
+ size_t j = i - block + 1;
+
+ // Load all 8 vectors
+ int32x4_t ends_vec0 = vld1q_s32(&ends[j]);
+ int32x4_t ends_vec1 = vld1q_s32(&ends[j + simd_width]);
+ int32x4_t ends_vec2 = vld1q_s32(&ends[j + 2 * simd_width]);
+ int32x4_t ends_vec3 = vld1q_s32(&ends[j + 3 * simd_width]);
+ int32x4_t ends_vec4 = vld1q_s32(&ends[j + 4 * simd_width]);
+ int32x4_t ends_vec5 = vld1q_s32(&ends[j + 5 * simd_width]);
+ int32x4_t ends_vec6 = vld1q_s32(&ends[j + 6 * simd_width]);
+ int32x4_t ends_vec7 = vld1q_s32(&ends[j + 7 * simd_width]);
+
+ // Compare all vectors
+ uint32x4_t mask0 = vcgtq_s32(start_vec, ends_vec0);
+ uint32x4_t mask1 = vcgtq_s32(start_vec, ends_vec1);
+ uint32x4_t mask2 = vcgtq_s32(start_vec, ends_vec2);
+ uint32x4_t mask3 = vcgtq_s32(start_vec, ends_vec3);
+ uint32x4_t mask4 = vcgtq_s32(start_vec, ends_vec4);
+ uint32x4_t mask5 = vcgtq_s32(start_vec, ends_vec5);
+ uint32x4_t mask6 = vcgtq_s32(start_vec, ends_vec6);
+ uint32x4_t mask7 = vcgtq_s32(start_vec, ends_vec7);
+
+ // Convert to boolean masks
+ uint32x4_t bool_mask0 = vaddq_u32(mask0, ones);
+ uint32x4_t bool_mask1 = vaddq_u32(mask1, ones);
+ uint32x4_t bool_mask2 = vaddq_u32(mask2, ones);
+ uint32x4_t bool_mask3 = vaddq_u32(mask3, ones);
+ uint32x4_t bool_mask4 = vaddq_u32(mask4, ones);
+ uint32x4_t bool_mask5 = vaddq_u32(mask5, ones);
+ uint32x4_t bool_mask6 = vaddq_u32(mask6, ones);
+ uint32x4_t bool_mask7 = vaddq_u32(mask7, ones);
+
+ // Sum all lanes and accumulate
+ size_t count = vaddvq_u32(bool_mask0) + vaddvq_u32(bool_mask1) +
+ vaddvq_u32(bool_mask2) + vaddvq_u32(bool_mask3) +
+ vaddvq_u32(bool_mask4) + vaddvq_u32(bool_mask5) +
+ vaddvq_u32(bool_mask6) + vaddvq_u32(bool_mask7);
+
+ found += count;
+ i -= block;
+ if (count < block) {
+ break;
+ }
+ }
+ }
+ #endif
+ } else {
+ if (branch[i] == SIZE_MAX) {
+ return found;
+ }
+ i = branch[i];
+ }
+ }
+ if (i==0 && start <= ends[0] && starts[0] <= end) {
+ ++found;
+ }
+ return found;
+ }
+
+ /**
+ * @brief Like count, but optimized for large query ranges.
+ * @note Uses exponential search - best when query range is large relative to stored intervals
+ * @param start The start of the query interval.
+ * @param end The end of the query interval.
+ * @return Number of intervals overlapping the query.
+ */
+ size_t count_large(const S start, const S end) const noexcept {
+ if (starts.empty()) {
+ return 0;
+ }
+ const size_t idx = upper_bound(end);
+ if (idx == SIZE_MAX) {
+ return 0;
+ }
+ size_t i = idx;
+ upper_bound_range(start, i, idx);
+ while (i != SIZE_MAX && start <= ends[i]) {
+ --i;
+ }
+ size_t count = idx - i;
+ while (i != SIZE_MAX) {
+ if (start <= ends[i]) {
+ count += 1;
+ --i;
+ } else {
+ i = branch[i];
+ }
+ }
+ return count;
+ }
+
+ /**
+ * @brief Tests whether any interval overlaps the point range [start,end].
+ * @param start The start of the query interval.
+ * @param end The end of the query interval.
+ * @return true if at least one overlap exists, false otherwise.
+ */
+ bool has_overlaps(const S start, const S end) const noexcept {
+ if (starts.empty()) {
+ return false;
+ }
+ const size_t idx = upper_bound(end);
+ return idx != SIZE_MAX && start <= ends[idx];
+ }
+
+ /**
+ * @brief Collects the indices of all intervals intersecting [start,end].
+ * @param start The start of the query interval.
+ * @param end The end of the query interval.
+ * @param found Output vector that will be filled with matching interval indices.
+ */
+ void search_idxs(const S start, const S end, std::vector& found) const {
+ if (starts.empty()) {
+ return;
+ }
+ const size_t idx = upper_bound(end);
+ if (idx == SIZE_MAX) {
+ return;
+ }
+ size_t i = idx;
+ while (i != SIZE_MAX && start <= ends[i]) {
+ --i;
+ }
+ if (i == SIZE_MAX) {
+ found.insert(found.end(), CountingIterator(0), CountingIterator(idx + 1));
+ return;
+ }
+ found.insert(found.end(), CountingIterator(i + 1), CountingIterator(idx + 1));
+ i = branch[i];
+ while (i != SIZE_MAX) {
+ if (start <= ends[i]) {
+ found.push_back(i);
+ --i;
+ } else {
+ i = branch[i];
+ }
+ }
+ }
+
+ /**
+ * @brief Collects the (start,end) pairs of all intervals intersecting [start,end].
+ * @param start The start of the query interval.
+ * @param end The end of the query interval.
+ * @param found Output vector that will be filled with matching interval key pairs.
+ */
+ void search_keys(const S start, const S end, std::vector>& found) const {
+ if (starts.empty()) {
+ return;
+ }
+ const size_t idx = upper_bound(end);
+ if (idx == SIZE_MAX) {
+ return;
+ }
+ size_t i = idx;
+ while (i != SIZE_MAX && start <= ends[i]) {
+ found.emplace_back() = {starts[i], ends[i]};
+ --i;
+ }
+ if (i == SIZE_MAX) {
+ return;
+ }
+ i = branch[i];
+ while (i != SIZE_MAX) {
+ if (start <= ends[i]) {
+ found.emplace_back() = {starts[i], ends[i]};
+ --i;
+ } else {
+ i = branch[i];
+ }
+ }
+ }
+
+ /**
+ * @brief Collects the full Interval objects intersecting [start,end].
+ * @param start The start of the query interval.
+ * @param end The end of the query interval.
+ * @param found Output vector that will be filled with matching Interval instances.
+ */
+ void search_items(const S start, const S end, std::vector>& found) const {
+ if (starts.empty()) {
+ return;
+ }
+ const size_t idx = upper_bound(end);
+ if (idx == SIZE_MAX) {
+ return;
+ }
+ size_t i = idx;
+ while (i != SIZE_MAX && start <= ends[i]) {
+ found.emplace_back() = {starts[i], ends[i], data[i]};
+ --i;
+ }
+ if (i == SIZE_MAX) {
+ return;
+ }
+ i = branch[i];
+ while (i != SIZE_MAX) {
+ if (start <= ends[i]) {
+ found.emplace_back() = {starts[i], ends[i], data[i]};
+ --i;
+ } else {
+ i = branch[i];
+ }
+ }
+ }
+
+ /**
+ * @brief Computes how many intervals overlap [start,end] and the total covered length.
+ * @param start The start of the query interval.
+ * @param end The end of the query interval.
+ * @param cov_result Pair where first = count of overlaps, second = sum of overlapping lengths.
+ */
+ void coverage(const S start, const S end, std::pair &cov_result) const {
+ if (starts.empty()) {
+ return;
+ }
+ size_t i = upper_bound(end);
+ if (i == SIZE_MAX) {
+ return;
+ }
+// size_t i = idx;
+ while (i != SIZE_MAX && start <= ends[i]) {
+ ++cov_result.first;
+ cov_result.second += std::min(ends[i], end) - std::max(starts[i], start);
+ --i;
+ }
+ if (i == SIZE_MAX) {
+ return;
+ }
+ i = branch[i];
+ while (i != SIZE_MAX) {
+ if (start <= ends[i]) {
+ ++cov_result.first;
+ cov_result.second += std::min(ends[i], end) - std::max(starts[i], start);
+ --i;
+ } else {
+ i = branch[i];
+ }
+ }
+ }
+
+ /**
+ * @brief Finds all data values of intervals “stabbed” by a single point.
+ * @param point The point to test.
+ * @param found Output vector that will be filled with data values of intervals containing point.
+ */
+ void search_point(const S point, std::vector& found) const {
+ if (starts.empty()) {
+ return;
+ }
+ size_t i = upper_bound(point);
+// size_t i = idx;
+ while (i != SIZE_MAX && point <= ends[i]) {
+ found.push_back(data[i]);
+ --i;
+ }
+ i = branch[i];
+ while (i != SIZE_MAX) {
+ if (point <= ends[i]) {
+ found.push_back(data[i]);
+ --i;
+ } else {
+ i = branch[i];
+ }
+ }
+ }
+
+ protected:
+
+ enum class simd_kind { none, avx2, neon };
+
+ std::vector> tmp;
+
+ template
+ void sort_block(size_t start_i, size_t end_i, CompareFunc compare) {
+ size_t range_size = end_i - start_i;
+ tmp.resize(range_size);
+ for (size_t i = 0; i < range_size; ++i) {
+ tmp[i] = Interval(starts[start_i + i], ends[start_i + i], data[start_i + i]);
+ }
+ std::sort(tmp.begin(), tmp.end(), compare);
+ for (size_t i = 0; i < range_size; ++i) {
+ starts[start_i + i] = tmp[i].start;
+ ends[start_i + i] = tmp[i].end;
+ data[start_i + i] = tmp[i].data;
+ }
+ }
+
+ /**
+ * @brief Ensures the global interval list is properly sorted by start (and end within ties).
+ * If only ends need resorting among equal starts, does a more targeted pass.
+ */
+ void sort_intervals() {
+ if (!start_sorted) {
+ sort_block(0, starts.size(),
+ [](const Interval& a, const Interval& b) { return (a.start < b.start || (a.start == b.start && a.end > b.end)); });
+ start_sorted = true;
+ end_sorted = true;
+ } else if (!end_sorted) { // only sort parts that need sorting - ends in descending order
+ size_t it_start = 0;
+ while (it_start < starts.size()) {
+ size_t block_end = it_start + 1;
+ bool needs_sort = false;
+ while (block_end < starts.size() && starts[block_end] == starts[it_start]) {
+ if (block_end > it_start && ends[block_end] > ends[block_end - 1]) {
+ needs_sort = true;
+ }
+ ++block_end;
+ }
+ if (needs_sort) {
+ sort_block(it_start, block_end, [](const Interval& a, const Interval& b) { return a.end > b.end; });
+ }
+ it_start = block_end;
+ }
+ end_sorted = true;
+ }
+ }
+
+ struct CountingIterator { // Just for using insert indexes
+ size_t value;
+ using iterator_category = std::forward_iterator_tag;
+ using value_type = size_t;
+ using difference_type = std::ptrdiff_t;
+ using pointer = const size_t*;
+ using reference = size_t;
+
+ explicit CountingIterator(size_t v) : value(v) {}
+ size_t operator*() const { return value; }
+ CountingIterator& operator++() { ++value; return *this; }
+ bool operator!=(const CountingIterator& other) const { return value != other.value; }
+ };
+};
+
+
+template
+class IntervalMapEytz : public IntervalMap {
+public:
+ /**
+ * @brief Builds the Eytzinger‐layout index and branch pointers for fast searching.
+ * @note Overrides the base build() to use the cache‐friendly layout.
+ */
+ void build() override {
+ if (this->starts.size() == 0) {
+ return;
+ }
+ this->starts.shrink_to_fit();
+ this->ends.shrink_to_fit();
+ this->data.shrink_to_fit();
+ this->sort_intervals();
+
+ eytz.resize(this->starts.size() + 1);
+ eytz_index.resize(this->starts.size() + 1);
+ eytzinger(&this->starts[0], this->starts.size());
+
+ this->branch.resize(this->starts.size(), SIZE_MAX);
+ std::vector> br;
+ br.reserve(1000);
+ br.emplace_back() = {this->ends[0], 0};
+ for (size_t i=1; i < this->ends.size(); ++i) {
+ while (!br.empty() && br.back().first < this->ends[i]) {
+ br.pop_back();
+ }
+ if (!br.empty()) {
+ this->branch[i] = br.back().second;
+ }
+ br.emplace_back() = {this->ends[i], i};
+ }
+// this->idx = 0;
+ }
+
+ /**
+ * @brief Uses the Eytzinger‐layout tree to locate the largest start ≤ x.
+ * @param x The search value.
+ * @note Overrides the base upper_bound to navigate the implicit binary tree.
+ */
+ inline size_t upper_bound(const S x) const noexcept override {
+ size_t i = 0;
+ const size_t n_intervals = this->starts.size();
+ while (i < n_intervals) {
+ if (eytz[i] > x) {
+ i = 2 * i + 1;
+ } else {
+ i = 2 * i + 2;
+ }
+ }
+ int shift = __builtin_ffs(~(i + 1));
+ size_t best_idx = (i >> shift) - ((shift > 1) ? 1 : 0);
+ i = (best_idx < n_intervals) ? eytz_index[best_idx] : n_intervals - 1;
+ if (i > 0 && this->starts[i] > x) {
+ --i;
+ }
+ return i;
+ }
+
+private:
+ std::vector eytz;
+ std::vector eytz_index;
+
+ size_t eytzinger_helper(S* arr, size_t n, size_t i, size_t k) {
+ if (k < n) {
+ i = eytzinger_helper(arr, n, i, 2*k+1);
+ eytz[k] = this->starts[i];
+ eytz_index[k] = i;
+ ++i;
+ i = eytzinger_helper(arr, n, i, 2*k + 2);
+ }
+ return i;
+ }
+
+ int eytzinger(S* arr, size_t n) {
+ return eytzinger_helper(arr, n, 0, 0);
+ }
+};
+
+} // si
diff --git a/sequila/sequila-core/superintervals/src/superintervals.rs b/sequila/sequila-core/superintervals/src/superintervals.rs
new file mode 100644
index 0000000..bcbc9cf
--- /dev/null
+++ b/sequila/sequila-core/superintervals/src/superintervals.rs
@@ -0,0 +1,1063 @@
+//! This module provides an associative data structure for performing interval intersection queries.
+
+use aligned_vec::{AVec, ConstAlign};
+use serde::{Deserialize, Serialize};
+use std::cmp::Ordering;
+use std::cmp::{max, min};
+
+/// Represents an interval with associated data.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Interval {
+ pub start: i32,
+ pub end: i32,
+ pub data: T,
+}
+
+#[cfg(target_feature = "avx2")]
+type AlignedEnds = AVec>;
+
+#[cfg(all(target_feature = "neon", not(target_feature = "avx2")))]
+type AlignedEnds = AVec>;
+
+#[cfg(not(any(target_feature = "avx2", target_feature = "neon")))]
+type AlignedEnds = Vec;
+
+/// A static data structure for finding interval intersections
+///
+/// IntervalMap is a template class that provides efficient interval intersection operations.
+/// It supports adding intervals, indexing them for fast queries, and performing various
+/// intersection operations.
+///
+/// Intervals are considered end-inclusive
+/// The build() function must be called before any queries. If more intervals are added, call build() again.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct IntervalMap {
+ pub starts: Vec,
+ // pub ends: Vec,
+ pub ends: AlignedEnds,
+ pub branch: Vec,
+ pub data: Vec,
+ pub start_sorted: bool,
+ pub end_sorted: bool,
+}
+
+impl IntervalMap {
+ pub fn new() -> Self {
+ IntervalMap {
+ starts: Vec::new(),
+ // ends: Vec::new(),
+ ends: {
+ #[cfg(target_feature = "avx2")]
+ {
+ AVec::new(32)
+ }
+
+ #[cfg(all(target_feature = "neon", not(target_feature = "avx2")))]
+ {
+ AVec::new(16)
+ }
+
+ #[cfg(not(any(target_feature = "avx2", target_feature = "neon")))]
+ {
+ Vec::new()
+ }
+ },
+
+ branch: Vec::new(),
+ data: Vec::new(),
+ start_sorted: true,
+ end_sorted: true,
+ }
+ }
+ /// Clears all intervals from the structure.
+ pub fn clear(&mut self) {
+ self.starts.clear();
+ self.ends.clear();
+ self.branch.clear();
+ self.data.clear();
+ }
+
+ pub fn reserve(&mut self, n: usize) {
+ self.starts.reserve(n);
+ self.ends.reserve(n);
+ self.data.reserve(n);
+ }
+
+ pub fn size(&self) -> usize {
+ self.starts.len()
+ }
+
+ /// Adds a new interval to the structure.
+ ///
+ /// # Arguments
+ ///
+ /// * `start` - The start point of the interval.
+ /// * `end` - The end point of the interval.
+ /// * `value` - The data associated with the interval.
+ pub fn add(&mut self, start: i32, end: i32, value: T) {
+ if self.start_sorted && !self.starts.is_empty() {
+ self.start_sorted = start >= *self.starts.last().unwrap();
+ if self.start_sorted
+ && start == *self.starts.last().unwrap()
+ && end > *self.ends.last().unwrap()
+ {
+ self.end_sorted = false;
+ }
+ }
+ self.starts.push(start);
+ self.ends.push(end);
+ self.data.push(value);
+ }
+
+ pub fn at(&self, index: usize) -> Interval {
+ Interval {
+ start: self.starts[index],
+ end: self.ends[index],
+ data: self.data[index].clone(),
+ }
+ }
+
+ /// Sorts the intervals by start and -end.
+ pub fn sort_intervals(&mut self) {
+ if !self.start_sorted {
+ self.sort_block(0, self.starts.len(), |a, b| {
+ if a.start < b.start {
+ Ordering::Less
+ } else if a.start == b.start {
+ if a.end > b.end {
+ Ordering::Less
+ } else {
+ Ordering::Greater
+ }
+ } else {
+ Ordering::Greater
+ }
+ });
+ self.start_sorted = true;
+ self.end_sorted = true;
+ } else if !self.end_sorted {
+ let mut it_start = 0;
+ while it_start < self.starts.len() {
+ let mut block_end = it_start + 1;
+ let mut needs_sort = false;
+ while block_end < self.starts.len()
+ && self.starts[block_end] == self.starts[it_start]
+ {
+ if block_end > it_start && self.ends[block_end] > self.ends[block_end - 1] {
+ needs_sort = true;
+ }
+ block_end += 1;
+ }
+ if needs_sort {
+ self.sort_block(it_start, block_end, |a, b| a.end.cmp(&b.end).reverse());
+ }
+ it_start = block_end;
+ }
+ self.end_sorted = true;
+ }
+ }
+
+ /// Build the index. Must be called before queries are performed.
+ pub fn build(&mut self) {
+ if self.starts.is_empty() {
+ return;
+ }
+ self.sort_intervals();
+ self.branch.resize(self.starts.len(), usize::MAX);
+ let mut br: Vec<(i32, usize)> = Vec::with_capacity((self.starts.len() / 10) + 1);
+ unsafe {
+ br.push((*self.ends.get_unchecked(0), 0));
+ for i in 1..self.ends.len() {
+ while !br.is_empty() && br.last().unwrap().0 < *self.ends.get_unchecked(i) {
+ br.pop();
+ }
+ if !br.is_empty() {
+ *self.branch.get_unchecked_mut(i) = br.last().unwrap().1;
+ }
+ br.push((*self.ends.get_unchecked(i), i));
+ }
+ }
+ }
+
+ #[inline(always)]
+ pub fn upper_bound(&self, value: i32) -> usize {
+ let mut idx = 0;
+ let mut length = self.starts.len();
+ unsafe {
+ while length > 1 {
+ let half = length / 2;
+ idx += (*self.starts.get_unchecked(idx + half) <= value) as usize * (length - half);
+ length = half;
+ }
+ // idx = idx.wrapping_sub((*self.starts.get_unchecked(idx) > value) as usize);
+ if *self.starts.get_unchecked(idx) > value {
+ idx = idx.wrapping_sub(1);
+ }
+ }
+ return idx;
+ }
+
+ #[inline(always)]
+ pub fn upper_bound_range(&self, value: i32, right: usize) -> usize {
+ let mut left = right;
+ let mut search_right = right;
+ let mut bound = 1;
+ // Exponential search to find a smaller range
+ unsafe {
+ while left > 0 && value < *self.starts.get_unchecked(left) {
+ search_right = left;
+ left = if bound <= left { left - bound } else { 0 };
+ bound *= 2;
+ }
+ // Binary search in the found range
+ let mut length = search_right - left;
+ while length > 1 {
+ let half = length / 2;
+ let condition = *self.starts.get_unchecked(left + half) < value;
+ left += if condition { length - half } else { 0 };
+ length = half;
+ }
+ if left == 0 && *self.starts.get_unchecked(left) >= value {
+ left = usize::MAX;
+ }
+ }
+ left
+ }
+
+ pub fn has_overlaps(&self, start: i32, end: i32) -> bool {
+ if self.starts.is_empty() {
+ return false;
+ }
+ let idx = self.upper_bound(end);
+ unsafe { idx != usize::MAX && start <= *self.ends.get_unchecked(idx) }
+ }
+
+ // This is the simple algorithm, but suffers performance wise due to the branching
+ // pub fn find_overlaps(&mut self, start: i32, end: i32, found: &mut Vec) {
+ // if self.starts.is_empty() {
+ // return;
+ // }
+ // let mut i = self.upper_bound(end);
+ //
+ // unsafe {
+ // while i != usize::MAX {
+ // if start <= *self.ends.get_unchecked(i) {
+ // found.push(self.data.get_unchecked(i).clone());
+ // i = i.wrapping_sub(1);
+ // } else {
+ // i = *self.branch.get_unchecked(i);
+ // }
+ // }
+ // }
+ // }
+
+ /// Finds all intervals that overlap with the given range.
+ ///
+ /// # Arguments
+ ///
+ /// * `start` - The start of the range to check for overlaps.
+ /// * `end` - The end of the range to check for overlaps.
+ /// * `found` - A mutable vector to store the overlapping intervals' data.
+ pub fn search_values(&self, start: i32, end: i32, found: &mut Vec) {
+ if self.starts.is_empty() {
+ return;
+ }
+ let idx = self.upper_bound(end);
+ if idx == usize::MAX {
+ return;
+ }
+ let mut i = idx;
+ unsafe {
+ while i != usize::MAX && start <= *self.ends.get_unchecked(i) {
+ i = i.wrapping_sub(1);
+ }
+ if i == usize::MAX {
+ found.reserve(idx + 1);
+ found.extend((0..=idx).rev().map(|j| self.data.get_unchecked(j).clone()));
+ return;
+ }
+ let count = idx - i;
+ found.reserve(count);
+ found.extend(
+ ((i + 1)..=idx)
+ .rev()
+ .map(|j| self.data.get_unchecked(j).clone()),
+ );
+ i = *self.branch.get_unchecked(i);
+ while i != usize::MAX {
+ if start <= *self.ends.get_unchecked(i) {
+ found.push(self.data.get_unchecked(i).clone());
+ i = i.wrapping_sub(1);
+ } else {
+ i = *self.branch.get_unchecked(i);
+ }
+ }
+ }
+ }
+
+ /// Finds all intervals that overlap with the given range. Works best when
+ /// query intervals are large compared to stored database intervals. Uses
+ /// an exponential search to find overlaps
+ ///
+ /// # Arguments
+ ///
+ /// * `start` - The start of the range to check for overlaps.
+ /// * `end` - The end of the range to check for overlaps.
+ /// * `found` - A mutable vector to store the overlapping intervals' data.
+ pub fn search_values_large(&self, start: i32, end: i32, found: &mut Vec) {
+ if self.starts.is_empty() {
+ return;
+ }
+ let idx = self.upper_bound(end);
+ if idx == usize::MAX {
+ return;
+ }
+ let mut i = self.upper_bound_range(start, idx);
+ unsafe {
+ while i != usize::MAX && start <= *self.ends.get_unchecked(i) {
+ i = i.wrapping_sub(1);
+ }
+ if i == usize::MAX {
+ found.reserve(idx + 1);
+ found.extend((0..=idx).rev().map(|j| self.data.get_unchecked(j).clone()));
+ return;
+ }
+ found.reserve(idx - i);
+ found.extend(
+ ((i + 1)..=idx)
+ .rev()
+ .map(|j| self.data.get_unchecked(j).clone()),
+ );
+ i = *self.branch.get_unchecked(i);
+ while i != usize::MAX {
+ if start <= *self.ends.get_unchecked(i) {
+ found.push(self.data.get_unchecked(i).clone());
+ i = i.wrapping_sub(1);
+ } else {
+ i = *self.branch.get_unchecked(i);
+ }
+ }
+ }
+ }
+
+ /// Counts all intervals that overlap with the given range.
+ ///
+ /// # Arguments
+ ///
+ /// * `start` - The start of the range to check for overlaps.
+ /// * `end` - The end of the range to check for overlaps.
+ ///
+ /// # Returns
+ ///
+ /// The number of overlapping intervals.
+ pub fn count_linear(&self, start: i32, end: i32) -> usize {
+ if self.starts.is_empty() {
+ return 0;
+ }
+ let idx = self.upper_bound(end);
+ if idx == usize::MAX {
+ return 0;
+ }
+ let mut i = idx;
+ let mut count: usize = 0;
+ unsafe {
+ while i != usize::MAX && start <= *self.ends.get_unchecked(i) {
+ i = i.wrapping_sub(1);
+ }
+ if i == usize::MAX {
+ return idx + 1;
+ }
+ count += idx - i;
+ i = *self.branch.get_unchecked(i);
+ while i != usize::MAX {
+ if start <= *self.ends.get_unchecked(i) {
+ count += 1;
+ i = i.wrapping_sub(1);
+ } else {
+ i = *self.branch.get_unchecked(i);
+ }
+ }
+ }
+ count
+ }
+
+ /// Counts the number of intervals that overlap with the given range.
+ ///
+ /// # Arguments
+ ///
+ /// * `start` - The start of the range to check for overlaps.
+ /// * `end` - The end of the range to check for overlaps.
+ ///
+ /// # Returns
+ ///
+ /// The number of overlapping intervals.
+ /// Counts the number of intervals that overlap with the given range.
+ pub fn count(&self, start: i32, end: i32) -> usize {
+ if self.starts.is_empty() {
+ return 0;
+ }
+ let idx = self.upper_bound(end);
+ if idx == usize::MAX {
+ return 0;
+ }
+ let mut i = idx;
+ let mut found = 0;
+
+ unsafe {
+ #[cfg(target_arch = "x86_64")]
+ {
+ use std::arch::x86_64::*;
+ let start_vec = _mm256_set1_epi32(start);
+ const SIMD_WIDTH: usize = 8; // 256 bits / 32 bits = 8 elements
+ const BLOCK: usize = SIMD_WIDTH * 4; // 32 elements per block, 2 cache lines
+
+ while i > 0 {
+ if start <= *self.ends.get_unchecked(i) {
+ found += 1;
+ i -= 1;
+
+ // Process blocks of data with SIMD
+ // while i > BLOCK {
+ // let mut count = 0;
+ // let mut j = i;
+ // // Process SIMD_WIDTH elements at a time
+ // while j > i - BLOCK {
+ // let end_idx = if j >= SIMD_WIDTH { j - SIMD_WIDTH + 1 } else { 0 };
+ // let ends_vec = _mm256_loadu_si256(self.ends.as_ptr().add(end_idx) as *const __m256i);
+ // let cmp_mask = _mm256_cmpgt_epi32(start_vec, ends_vec);
+ // let mask = _mm256_movemask_ps(_mm256_castsi256_ps(cmp_mask));
+ // count += 8 - (mask).count_ones() as usize;
+ // j = j.saturating_sub(SIMD_WIDTH);
+ // }
+ //
+ // found += count;
+ // i -= BLOCK;
+ //
+ // // Early exit if we didn't find full block worth of matches
+ // if count < BLOCK && start > *self.ends.get_unchecked(i + 1) {
+ // break;
+ // }
+ // }
+ while i > BLOCK {
+ let j = i - BLOCK + 1;
+
+ // Load all 4 vectors
+ let ends_vec0 =
+ _mm256_loadu_si256(self.ends.as_ptr().add(j) as *const __m256i);
+ let ends_vec1 = _mm256_loadu_si256(
+ self.ends.as_ptr().add(j + SIMD_WIDTH) as *const __m256i,
+ );
+ let ends_vec2 = _mm256_loadu_si256(
+ self.ends.as_ptr().add(j + 2 * SIMD_WIDTH) as *const __m256i,
+ );
+ let ends_vec3 = _mm256_loadu_si256(
+ self.ends.as_ptr().add(j + 3 * SIMD_WIDTH) as *const __m256i,
+ );
+
+ // Compare all vectors
+ let cmp_mask0 = _mm256_cmpgt_epi32(start_vec, ends_vec0);
+ let cmp_mask1 = _mm256_cmpgt_epi32(start_vec, ends_vec1);
+ let cmp_mask2 = _mm256_cmpgt_epi32(start_vec, ends_vec2);
+ let cmp_mask3 = _mm256_cmpgt_epi32(start_vec, ends_vec3);
+
+ // Extract masks
+ let mask0 = _mm256_movemask_ps(_mm256_castsi256_ps(cmp_mask0));
+ let mask1 = _mm256_movemask_ps(_mm256_castsi256_ps(cmp_mask1));
+ let mask2 = _mm256_movemask_ps(_mm256_castsi256_ps(cmp_mask2));
+ let mask3 = _mm256_movemask_ps(_mm256_castsi256_ps(cmp_mask3));
+
+ // Count and accumulate
+ let count = (8 - (mask0).count_ones() as usize)
+ + (8 - (mask1).count_ones() as usize)
+ + (8 - (mask2).count_ones() as usize)
+ + (8 - (mask3).count_ones() as usize);
+
+ found += count;
+ i -= BLOCK;
+
+ // Early exit if we didn't find full block worth of matches
+ if count < BLOCK && start > *self.ends.get_unchecked(i + 1) {
+ break;
+ }
+ }
+ } else {
+ if *self.branch.get_unchecked(i) == usize::MAX {
+ return found;
+ }
+ i = *self.branch.get_unchecked(i);
+ }
+ }
+ }
+
+ #[cfg(target_arch = "aarch64")]
+ {
+ use std::arch::aarch64::*;
+ let start_vec = vdupq_n_s32(start);
+ const SIMD_WIDTH: usize = 4; // 128 bits / 32 bits = 4 elements
+ const BLOCK: usize = SIMD_WIDTH * 8; // 2 cache lines
+ let ones = vdupq_n_u32(1);
+ while i > 0 {
+ if start <= *self.ends.get_unchecked(i) {
+ found += 1;
+ i -= 1;
+ // while i > BLOCK {
+ // let mut count = 0;
+ // let mut j = i;
+ // while j > i - BLOCK {
+ // let end_idx = if j >= SIMD_WIDTH { j - SIMD_WIDTH + 1 } else { 0 };
+ // let ends_vec = vld1q_s32(self.ends.as_ptr().add(end_idx) as *const i32);
+ // let mask = vcgtq_s32(start_vec, ends_vec);
+ // let bool_mask = vaddq_u32(mask, ones);
+ // count += vaddvq_u32(bool_mask) as usize;
+ // j = j.saturating_sub(SIMD_WIDTH);
+ // }
+ // found += count;
+ // i -= BLOCK;
+ // if count < BLOCK {
+ // break;
+ // }
+ // }
+
+ while i > BLOCK {
+ let j = i - BLOCK + 1;
+
+ // Load all 8 vectors
+ let ends_vec0 = vld1q_s32(self.ends.as_ptr().add(j) as *const i32);
+ let ends_vec1 =
+ vld1q_s32(self.ends.as_ptr().add(j + SIMD_WIDTH) as *const i32);
+ let ends_vec2 =
+ vld1q_s32(self.ends.as_ptr().add(j + 2 * SIMD_WIDTH) as *const i32);
+ let ends_vec3 =
+ vld1q_s32(self.ends.as_ptr().add(j + 3 * SIMD_WIDTH) as *const i32);
+ let ends_vec4 =
+ vld1q_s32(self.ends.as_ptr().add(j + 4 * SIMD_WIDTH) as *const i32);
+ let ends_vec5 =
+ vld1q_s32(self.ends.as_ptr().add(j + 5 * SIMD_WIDTH) as *const i32);
+ let ends_vec6 =
+ vld1q_s32(self.ends.as_ptr().add(j + 6 * SIMD_WIDTH) as *const i32);
+ let ends_vec7 =
+ vld1q_s32(self.ends.as_ptr().add(j + 7 * SIMD_WIDTH) as *const i32);
+
+ // Compare all vectors
+ let mask0 = vcgtq_s32(start_vec, ends_vec0);
+ let mask1 = vcgtq_s32(start_vec, ends_vec1);
+ let mask2 = vcgtq_s32(start_vec, ends_vec2);
+ let mask3 = vcgtq_s32(start_vec, ends_vec3);
+ let mask4 = vcgtq_s32(start_vec, ends_vec4);
+ let mask5 = vcgtq_s32(start_vec, ends_vec5);
+ let mask6 = vcgtq_s32(start_vec, ends_vec6);
+ let mask7 = vcgtq_s32(start_vec, ends_vec7);
+
+ // Convert to boolean masks
+ let bool_mask0 = vaddq_u32(mask0, ones);
+ let bool_mask1 = vaddq_u32(mask1, ones);
+ let bool_mask2 = vaddq_u32(mask2, ones);
+ let bool_mask3 = vaddq_u32(mask3, ones);
+ let bool_mask4 = vaddq_u32(mask4, ones);
+ let bool_mask5 = vaddq_u32(mask5, ones);
+ let bool_mask6 = vaddq_u32(mask6, ones);
+ let bool_mask7 = vaddq_u32(mask7, ones);
+
+ // Sum all lanes and accumulate
+ let count = vaddvq_u32(bool_mask0) as usize
+ + vaddvq_u32(bool_mask1) as usize
+ + vaddvq_u32(bool_mask2) as usize
+ + vaddvq_u32(bool_mask3) as usize
+ + vaddvq_u32(bool_mask4) as usize
+ + vaddvq_u32(bool_mask5) as usize
+ + vaddvq_u32(bool_mask6) as usize
+ + vaddvq_u32(bool_mask7) as usize;
+
+ found += count;
+ i -= BLOCK;
+ if count < BLOCK {
+ break;
+ }
+ }
+ } else {
+ if *self.branch.get_unchecked(i) == usize::MAX {
+ return found;
+ }
+ i = *self.branch.get_unchecked(i);
+ }
+ }
+ }
+
+ #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
+ {
+ const BLOCK: usize = 16;
+
+ while i > 0 {
+ if start <= *self.ends.get_unchecked(i) {
+ found += 1;
+ i -= 1;
+ while i > BLOCK {
+ let mut count = 0;
+ for j in (i - BLOCK + 1..=i).rev() {
+ if start <= *self.ends.get_unchecked(j) {
+ count += 1;
+ }
+ }
+ found += count;
+ i -= BLOCK;
+ if count < BLOCK && start > *self.ends.get_unchecked(i + 1) {
+ break;
+ }
+ }
+ } else {
+ if *self.branch.get_unchecked(i) == usize::MAX {
+ return found;
+ }
+ i = *self.branch.get_unchecked(i);
+ }
+ }
+ }
+ // Final check for element at index 0
+ if i == 0
+ && start <= *self.ends.get_unchecked(0)
+ && *self.starts.get_unchecked(0) <= end
+ {
+ found += 1;
+ }
+ }
+
+ found
+ }
+
+ /// Counts all intervals that overlap with the given range. Works best when
+ /// query intervals are large compared to stored database intervals. Uses
+ /// an exponential search to find overlaps
+ ///
+ /// # Arguments
+ ///
+ /// * `start` - The start of the range to check for overlaps.
+ /// * `end` - The end of the range to check for overlaps.
+ ///
+ /// # Returns
+ ///
+ /// The number of overlapping intervals.
+ pub fn count_large(&self, start: i32, end: i32) -> usize {
+ if self.starts.is_empty() {
+ return 0;
+ }
+ let idx = self.upper_bound(end);
+ if idx == usize::MAX {
+ return 0;
+ }
+ let mut i = self.upper_bound_range(start, idx);
+ let mut count: usize = 0;
+ unsafe {
+ while i != usize::MAX && start <= *self.ends.get_unchecked(i) {
+ i = i.wrapping_sub(1);
+ }
+ if i == usize::MAX {
+ return idx + 1;
+ }
+ count += idx - i;
+ i = *self.branch.get_unchecked(i);
+ while i != usize::MAX {
+ if start <= *self.ends.get_unchecked(i) {
+ count += 1;
+ i = i.wrapping_sub(1);
+ } else {
+ i = *self.branch.get_unchecked(i);
+ }
+ }
+ }
+ count
+ }
+
+ pub fn search_idxs(&self, start: i32, end: i32, found: &mut Vec) {
+ if self.starts.is_empty() {
+ return;
+ }
+ let idx = self.upper_bound(end);
+ if idx == usize::MAX {
+ return;
+ }
+ let mut i = idx;
+ unsafe {
+ while i != usize::MAX && start <= *self.ends.get_unchecked(i) {
+ i = i.wrapping_sub(1);
+ }
+ if i == usize::MAX {
+ found.extend((0..=idx).rev());
+ return;
+ }
+ found.extend(((i + 1)..=idx).rev());
+ i = *self.branch.get_unchecked(i);
+ while i != usize::MAX {
+ if start <= *self.ends.get_unchecked(i) {
+ found.push(i);
+ i = i.wrapping_sub(1);
+ } else {
+ i = *self.branch.get_unchecked(i);
+ }
+ }
+ }
+ }
+
+ pub fn search_keys(&self, start: i32, end: i32, found: &mut Vec<(i32, i32)>) {
+ if self.starts.is_empty() {
+ return;
+ }
+ let idx = self.upper_bound(end);
+ if idx == usize::MAX {
+ return;
+ }
+ let mut i = idx;
+ unsafe {
+ while i != usize::MAX && start <= *self.ends.get_unchecked(i) {
+ found.push((*self.starts.get_unchecked(i), *self.ends.get_unchecked(i)));
+ i = i.wrapping_sub(1);
+ }
+ if i != usize::MAX {
+ i = *self.branch.get_unchecked(i);
+ while i != usize::MAX {
+ if start <= *self.ends.get_unchecked(i) {
+ found.push((*self.starts.get_unchecked(i), *self.ends.get_unchecked(i)));
+ i = i.wrapping_sub(1);
+ } else {
+ i = *self.branch.get_unchecked(i);
+ }
+ }
+ }
+ }
+ }
+
+ pub fn search_items(&self, start: i32, end: i32, found: &mut Vec>) {
+ if self.starts.is_empty() {
+ return;
+ }
+ let idx = self.upper_bound(end);
+ if idx == usize::MAX {
+ return;
+ }
+ let mut i = idx;
+ unsafe {
+ while i != usize::MAX && start <= *self.ends.get_unchecked(i) {
+ found.push(Interval {
+ start: *self.starts.get_unchecked(i),
+ end: *self.ends.get_unchecked(i),
+ data: self.data.get_unchecked(i).clone(),
+ });
+ i = i.wrapping_sub(1);
+ }
+ if i != usize::MAX {
+ i = *self.branch.get_unchecked(i);
+ while i != usize::MAX {
+ if start <= *self.ends.get_unchecked(i) {
+ found.push(Interval {
+ start: *self.starts.get_unchecked(i),
+ end: *self.ends.get_unchecked(i),
+ data: self.data.get_unchecked(i).clone(),
+ });
+ i = i.wrapping_sub(1);
+ } else {
+ i = *self.branch.get_unchecked(i);
+ }
+ }
+ }
+ }
+ }
+
+ pub fn search_stabbed(&self, point: i32, found: &mut Vec) {
+ if self.starts.is_empty() {
+ return;
+ }
+ let idx = self.upper_bound(point);
+ if idx == usize::MAX {
+ return;
+ }
+ let mut i = idx;
+ unsafe {
+ while i != usize::MAX && point <= *self.ends.get_unchecked(i) {
+ found.push(self.data.get_unchecked(i).clone());
+ i = i.wrapping_sub(1);
+ }
+ if i != usize::MAX {
+ i = *self.branch.get_unchecked(i);
+ while i != usize::MAX {
+ if point <= *self.ends.get_unchecked(i) {
+ found.push(self.data.get_unchecked(i).clone());
+ i = i.wrapping_sub(1);
+ } else {
+ i = *self.branch.get_unchecked(i);
+ }
+ }
+ }
+ }
+ }
+
+ /// Counts the total coverage over the query interval.
+ ///
+ /// # Arguments
+ ///
+ /// * `start` - The start of the range to check for overlaps.
+ /// * `end` - The end of the range to check for overlaps.
+ ///
+ /// # Returns
+ ///
+ /// The number of overlapping intervals, plus the coverage.
+ pub fn coverage(&self, start: i32, end: i32) -> (usize, i32) {
+ if self.starts.is_empty() {
+ return (0, 0);
+ }
+ let mut i = self.upper_bound(end);
+ let mut count: usize = 0;
+ let mut coverage: i32 = 0;
+ unsafe {
+ while i != usize::MAX {
+ if start <= *self.ends.get_unchecked(i) {
+ count += 1;
+ coverage += min(*self.ends.get_unchecked(i), end)
+ - max(*self.starts.get_unchecked(i), start);
+ i -= 1;
+ } else {
+ i = *self.branch.get_unchecked(i);
+ }
+ }
+ }
+ (count, coverage)
+ }
+
+ fn sort_block(&mut self, start_i: usize, end_i: usize, compare: F)
+ where
+ F: Fn(&Interval, &Interval) -> Ordering,
+ {
+ unsafe {
+ let range_size = end_i - start_i;
+ let mut tmp: Vec> = Vec::with_capacity(range_size);
+ for i in 0..range_size {
+ tmp.push(Interval {
+ start: *self.starts.get_unchecked(start_i + i),
+ end: *self.ends.get_unchecked(start_i + i),
+ data: (*self.data.get_unchecked(start_i + i)).clone(),
+ });
+ }
+ tmp.sort_by(compare);
+ for i in 0..range_size {
+ self.starts[start_i + i] = tmp.get_unchecked(i).start;
+ self.ends[start_i + i] = tmp.get_unchecked(i).end;
+ self.data[start_i + i] = tmp.get_unchecked(i).data.clone();
+ }
+ }
+ }
+}
+
+// Iterator interfaces
+
+pub struct IndexIterator<'a, T> {
+ tree: &'a IntervalMap,
+ current_idx: usize,
+ query_start: i32,
+}
+
+impl<'a, T: Clone> Iterator for IndexIterator<'a, T> {
+ type Item = usize;
+
+ fn next(&mut self) -> Option {
+ if self.current_idx == usize::MAX {
+ return None;
+ }
+ unsafe {
+ // Linear scan backwards
+ if self.query_start <= *self.tree.ends.get_unchecked(self.current_idx) {
+ let value = self.current_idx;
+ self.current_idx = self.current_idx.wrapping_sub(1);
+ return Some(value);
+ }
+ // Traverse branch array
+ loop {
+ self.current_idx = *self.tree.branch.get_unchecked(self.current_idx);
+ if self.current_idx == usize::MAX {
+ break;
+ }
+ if self.query_start <= *self.tree.ends.get_unchecked(self.current_idx) {
+ let value = self.current_idx;
+ self.current_idx = self.current_idx.wrapping_sub(1);
+ return Some(value);
+ }
+ }
+ }
+ None
+ }
+}
+
+pub struct ItemIterator<'a, T> {
+ tree: &'a IntervalMap,
+ current_idx: usize,
+ query_start: i32,
+}
+
+impl<'a, T: Clone> Iterator for ItemIterator<'a, T> {
+ type Item = Interval;
+
+ fn next(&mut self) -> Option {
+ if self.current_idx == usize::MAX {
+ return None;
+ }
+ unsafe {
+ if self.query_start <= *self.tree.ends.get_unchecked(self.current_idx) {
+ let value = Interval {
+ start: *self.tree.starts.get_unchecked(self.current_idx),
+ end: *self.tree.ends.get_unchecked(self.current_idx),
+ data: self.tree.data.get_unchecked(self.current_idx).clone(),
+ };
+ self.current_idx = self.current_idx.wrapping_sub(1);
+ return Some(value);
+ }
+ loop {
+ self.current_idx = *self.tree.branch.get_unchecked(self.current_idx);
+ if self.current_idx == usize::MAX {
+ break;
+ }
+ if self.query_start <= *self.tree.ends.get_unchecked(self.current_idx) {
+ let value = Interval {
+ start: *self.tree.starts.get_unchecked(self.current_idx),
+ end: *self.tree.ends.get_unchecked(self.current_idx),
+ data: self.tree.data.get_unchecked(self.current_idx).clone(),
+ };
+ self.current_idx = self.current_idx.wrapping_sub(1);
+ return Some(value);
+ }
+ }
+ }
+ None
+ }
+}
+
+pub struct KeyIterator<'a, T> {
+ tree: &'a IntervalMap,
+ current_idx: usize,
+ query_start: i32,
+}
+
+impl<'a, T: Clone> Iterator for KeyIterator<'a, T> {
+ type Item = (i32, i32);
+
+ fn next(&mut self) -> Option {
+ if self.current_idx == usize::MAX {
+ return None;
+ }
+ unsafe {
+ if self.query_start <= *self.tree.ends.get_unchecked(self.current_idx) {
+ let value = (
+ *self.tree.starts.get_unchecked(self.current_idx),
+ *self.tree.ends.get_unchecked(self.current_idx),
+ );
+ self.current_idx = self.current_idx.wrapping_sub(1);
+ return Some(value);
+ }
+ loop {
+ self.current_idx = *self.tree.branch.get_unchecked(self.current_idx);
+ if self.current_idx == usize::MAX {
+ break;
+ }
+ if self.query_start <= *self.tree.ends.get_unchecked(self.current_idx) {
+ let value = (
+ *self.tree.starts.get_unchecked(self.current_idx),
+ *self.tree.ends.get_unchecked(self.current_idx),
+ );
+ self.current_idx = self.current_idx.wrapping_sub(1);
+ return Some(value);
+ }
+ }
+ }
+ None
+ }
+}
+
+pub struct ValueIterator<'a, T> {
+ tree: &'a IntervalMap,
+ current_idx: usize,
+ query_start: i32,
+}
+
+impl<'a, T: Clone> Iterator for ValueIterator<'a, T> {
+ type Item = T;
+
+ fn next(&mut self) -> Option {
+ if self.current_idx == usize::MAX {
+ return None;
+ }
+ unsafe {
+ if self.query_start <= *self.tree.ends.get_unchecked(self.current_idx) {
+ let value = self.tree.data.get_unchecked(self.current_idx).clone();
+ self.current_idx = self.current_idx.wrapping_sub(1);
+ return Some(value);
+ }
+ loop {
+ self.current_idx = *self.tree.branch.get_unchecked(self.current_idx);
+ if self.current_idx == usize::MAX {
+ break;
+ }
+ if self.query_start <= *self.tree.ends.get_unchecked(self.current_idx) {
+ let value = self.tree.data.get_unchecked(self.current_idx).clone();
+ self.current_idx = self.current_idx.wrapping_sub(1);
+ return Some(value);
+ }
+ }
+ }
+ None
+ }
+}
+
+// Updated iterator creation methods for IntervalMap
+impl IntervalMap {
+ /// Returns an iterator over indices of intervals that intersect [start, end]
+ pub fn search_idxs_iter(&self, start: i32, end: i32) -> IndexIterator {
+ let current_idx = if self.starts.is_empty() {
+ usize::MAX
+ } else {
+ self.upper_bound(end)
+ };
+ IndexIterator {
+ tree: self,
+ current_idx,
+ query_start: start,
+ }
+ }
+
+ /// Returns an iterator over items (intervals with data) that intersect [start, end]
+ pub fn search_items_iter(&self, start: i32, end: i32) -> ItemIterator {
+ let current_idx = if self.starts.is_empty() {
+ usize::MAX
+ } else {
+ self.upper_bound(end)
+ };
+ ItemIterator {
+ tree: self,
+ current_idx,
+ query_start: start,
+ }
+ }
+
+ /// Returns an iterator over keys (interval start/end pairs) that intersect [start, end]
+ pub fn search_keys_iter(&self, start: i32, end: i32) -> KeyIterator {
+ let current_idx = if self.starts.is_empty() {
+ usize::MAX
+ } else {
+ self.upper_bound(end)
+ };
+ KeyIterator {
+ tree: self,
+ current_idx,
+ query_start: start,
+ }
+ }
+
+ /// Returns an iterator over values (data) of intervals that intersect [start, end]
+ pub fn search_values_iter(&self, start: i32, end: i32) -> ValueIterator {
+ let current_idx = if self.starts.is_empty() {
+ usize::MAX
+ } else {
+ self.upper_bound(end)
+ };
+ ValueIterator {
+ tree: self,
+ current_idx,
+ query_start: start,
+ }
+ }
+}
diff --git a/sequila/sequila-core/superintervals/src/superintervals/README.md b/sequila/sequila-core/superintervals/src/superintervals/README.md
new file mode 100644
index 0000000..f9c1c70
--- /dev/null
+++ b/sequila/sequila-core/superintervals/src/superintervals/README.md
@@ -0,0 +1,119 @@
+# Superintervals Python API
+
+Install using `pip install superintervals`
+
+```python
+from superintervals import IntervalMap
+from array import array
+
+# Method 1: Manual construction
+imap = IntervalMap()
+imap.add(10, 20, 'A')
+imap.add(15, 25, 'B')
+imap.build()
+results = imap.search_values(8, 20) # ['A', 'B']
+
+# Method 2: Efficient construction from arrays (no build() needed)
+starts = array('i', [10, 15, 30])
+ends = array('i', [20, 25, 40])
+values = ['A', 'B', 'C']
+imap = IntervalMap.from_arrays(starts, ends, values)
+results = imap.search_values(8, 20) # ['A', 'B']
+
+# Batch operations for high performance
+query_starts = array('i', [5, 18, 35])
+query_ends = array('i', [12, 22, 45])
+counts = imap.count_batch(query_starts, query_ends) # [1, 2, 1]
+indices = imap.search_idxs_batch(query_starts, query_ends) # [[0], [0, 1], [2]]
+```
+
+### API Reference
+
+**IntervalMap** class:
+
+#### Construction Methods
+- `IntervalMap()`
+ Create empty interval map
+
+- `IntervalMap.from_arrays(starts, ends, values=None)`
+ Create from arrays (array.array, numpy arrays, or lists)
+ Returns ready-to-use IntervalMap (no build() needed)
+
+#### Adding Intervals
+- `add(start, end, value=None)`
+ Add interval with associated value
+
+- `build()`
+ Build index (required before queries when using add())
+
+#### Memory Management
+- `clear()`
+ Remove all intervals
+
+- `reserve(n)`
+ Reserve space for n intervals
+
+- `size()`
+ Get number of intervals
+
+#### Access Methods
+- `at(index)`
+ Get interval at index as (start, end, value)
+
+- `starts_at(index)`
+ Get start position at index
+
+- `ends_at(index)`
+ Get end position at index
+
+- `data_at(index)`
+ Get value at index
+
+#### Single Query Methods
+- `has_overlaps(start, end)`
+ Check if any intervals overlap range
+
+- `count(start, end)`
+ Count overlapping intervals
+
+- `search_values(start, end)`
+ Get values of overlapping intervals
+
+- `search_idxs(start, end)`
+ Get indices of overlapping intervals
+
+- `search_keys(start, end)`
+ Get (start, end) pairs of overlapping intervals
+
+- `search_items(start, end)`
+ Get (start, end, value) tuples of overlapping intervals
+
+- `coverage(start, end)`
+ Get (count, total_coverage) for range
+
+#### Batch Query Methods (High Performance)
+- `count_batch(starts, ends)`
+ Count overlaps for multiple ranges
+ Args: Memory views (array.array, numpy arrays)
+ Returns: List of counts
+
+- `search_idxs_batch(starts, ends)`
+ Get indices for multiple ranges
+ Args: Memory views (array.array, numpy arrays)
+ Returns: List of lists containing indices
+
+- `search_values_batch(starts, ends)`
+ Get values for multiple ranges
+ Args: Memory views (array.array, numpy arrays)
+ Returns: List of lists containing values
+
+### Performance Tips
+
+- Use `IntervalMap.from_arrays()` for best construction performance
+- Use batch methods for multiple queries (often 5-10x faster)
+- Convert lists to arrays for batch operations:
+ ```python
+ from array import array
+ starts = array('i', [1, 5, 10]) # For batch methods
+ ```
+- Reserve space with `reserve(n)` when adding many intervals manually
\ No newline at end of file
diff --git a/sequila/sequila-core/superintervals/src/superintervals/__init__.py b/sequila/sequila-core/superintervals/src/superintervals/__init__.py
new file mode 100644
index 0000000..d29a7f8
--- /dev/null
+++ b/sequila/sequila-core/superintervals/src/superintervals/__init__.py
@@ -0,0 +1,2 @@
+from .intervalmap import (
+ IntervalMap)
\ No newline at end of file
diff --git a/sequila/sequila-core/superintervals/src/superintervals/intervalmap.pxd b/sequila/sequila-core/superintervals/src/superintervals/intervalmap.pxd
new file mode 100644
index 0000000..31ea0d1
--- /dev/null
+++ b/sequila/sequila-core/superintervals/src/superintervals/intervalmap.pxd
@@ -0,0 +1,106 @@
+# distutils: language = c++
+from libcpp.vector cimport vector
+from libcpp.pair cimport pair
+
+cdef extern from "Python.h":
+ void Py_INCREF(object)
+ void Py_DECREF(object)
+
+
+cdef extern from "superintervals.hpp" namespace "si":
+
+ cdef cppclass Interval[S, T]:
+ S start, end
+ T data
+ Interval()
+ Interval(S s, S e, T d)
+
+
+ cdef cppclass CppIntervalMap "si::IntervalMap"[S, T]:
+ IntervalMap() except +
+
+ vector[S] starts, ends
+ vector[size_t] branch
+ vector[T] data
+ size_t idx
+
+ void clear()
+ void reserve(size_t n)
+ size_t size()
+ void add(S start, S end, const T& value)
+ void build()
+ void at(size_t index, Interval[S, T]& itv)
+ const Interval[S, T]& at(size_t index) const
+
+ # Search methods
+ void upper_bound(const S value) const
+ bint has_overlaps(const S start, const S end)
+ size_t count_linear(const S start, const S end)
+ size_t count(const S start, const S end)
+ size_t count_large(const S start, const S end)
+
+ void search_values(const S start, const S end, vector[T]& found)
+ void search_values_large(const S start, const S end, vector[T]& found)
+ void search_idxs(const S start, const S end, vector[size_t]& found)
+ void search_keys(const S start, const S end, vector[pair[S, S]]& found)
+ void search_items(const S start, const S end, vector[Interval[S, T]]& found)
+ void search_point(const S point, vector[T]& found)
+ void coverage(const S start, const S end, pair[size_t, S]& cov_result)
+
+ # Iterator classes not yet implemented!
+ # cppclass IndexIterator:
+ # IndexIterator(const IntervalMap * parent, size_t pos)
+ # size_t operator *() const
+ # IndexIterator& operator++()
+ # bint operator !=(const IndexIterator& other) const
+ # bint operator ==(const IndexIterator& other) const
+ #
+ # cppclass ItemIterator:
+ # ItemIterator(const IntervalMap * parent, size_t pos)
+ # Interval[S, T] operator *() const
+ # ItemIterator& operator++()
+ # bint operator !=(const ItemIterator& other) const
+ # bint operator ==(const ItemIterator& other) const
+ #
+ # cppclass IndexRange:
+ # IndexRange(const IntervalMap * parent, S start, S end)
+ # IndexIterator begin() const
+ # IndexIterator end() const
+ #
+ # cppclass ItemRange:
+ # ItemRange(const IntervalMap * parent, S start, S end)
+ # ItemIterator begin() const
+ # ItemIterator end() const
+ #
+ # IndexRange search_idxs(S start, S end) const
+ # ItemRange search_items(S start, S end) const
+
+
+
+# Type alias for Python object pointer
+ctypedef void * PyObjectPtr
+
+cdef class IntervalMap:
+ cdef CppIntervalMap[int, PyObjectPtr] * thisptr
+ cdef vector[PyObjectPtr] found_values
+ cdef vector[size_t] found_indexes
+
+ cpdef add(self, int start, int end, object value= *)
+ cpdef build(self)
+ cpdef at(self, int index)
+ cpdef starts_at(self, int index)
+ cpdef ends_at(self, int index)
+ cpdef data_at(self, int index)
+ cpdef clear(self)
+ cpdef reserve(self, size_t n)
+ cpdef size(self)
+ cpdef has_overlaps(self, int start, int end)
+ cpdef count(self, int start, int end)
+ cpdef search_values(self, int start, int end)
+ cpdef search_idxs(self, int start, int end)
+ cpdef search_keys(self, int start, int end)
+ cpdef search_items(self, int start, int end)
+ cpdef coverage(self, int start, int end)
+ cpdef count_batch(self, int[:] starts, int[:] ends)
+ cpdef search_idxs_batch(self, int[:] starts, int[:] ends)
+ cpdef search_values_batch(self, int[:] starts, int[:] ends)
diff --git a/sequila/sequila-core/superintervals/src/superintervals/intervalmap.pyx b/sequila/sequila-core/superintervals/src/superintervals/intervalmap.pyx
new file mode 100644
index 0000000..f80a2ca
--- /dev/null
+++ b/sequila/sequila-core/superintervals/src/superintervals/intervalmap.pyx
@@ -0,0 +1,479 @@
+
+from libcpp.pair cimport pair
+
+__all__ = ["IntervalMap"]
+
+
+cdef class IntervalMap:
+ """
+ SuperIntervals interval map to manage a collection of intervals with associated Python objects,
+ supporting operations such as adding intervals, checking overlaps, and querying stored data.
+ """
+
+ def __cinit__(self):
+ """
+ Initialize the IntervalMap.
+ """
+ self.thisptr = new CppIntervalMap[int, PyObjectPtr]()
+
+ def __dealloc__(self):
+ cdef PyObjectPtr obj_ptr
+ if self.thisptr:
+ for obj_ptr in self.thisptr.data:
+ if obj_ptr != NULL:
+ Py_DECREF(