diff --git a/Cargo.toml b/Cargo.toml
index 457fc81..de7a897 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,13 +14,31 @@ categories = ["mathematics", "science", "algorithms"]
 [lib]
 bench = false
 
+[features]
+default = ["std"]
+std = []
+parallel = ["dep:rayon", "std"]
+
 [dependencies]
-num-traits = "0.2"
+num-traits = { version = "0.2", default-features = false, features = ["libm"] }
+rayon = { version = "1", optional = true }
 
 [dev-dependencies]
 criterion = { version = "0.8", features = ["html_reports"] }
 approx = "0.5"
 
+[[bench]]
+name = "node_generation"
+harness = false
+
+[[bench]]
+name = "integration_1d"
+harness = false
+
+[[bench]]
+name = "cubature"
+harness = false
+
 [package.metadata.docs.rs]
 all-features = true
 rustdoc-args = ["--cfg", "docsrs"]
diff --git a/README.md b/README.md
index f7579de..4dcac00 100644
--- a/README.md
+++ b/README.md
@@ -8,9 +8,12 @@
 
 A high-performance numerical quadrature (integration) library for Rust.
 
-- **Gauss-Legendre** -- O(1) per-node generation via the Bogaert (2014) algorithm
-- **Gauss-Kronrod** -- embedded pairs (G7-K15 through G25-K51) for error estimation
+- **Comprehensive** -- Gauss-Legendre, Gauss-Kronrod, Jacobi, Hermite, Laguerre, Chebyshev, Radau, Lobatto, Clenshaw-Curtis, tanh-sinh, oscillatory, Cauchy PV
+- **Adaptive integration** -- QUADPACK-style error-driven refinement with configurable GK pairs
+- **Multi-dimensional** -- tensor products, Smolyak sparse grids, Genz-Malik adaptive cubature, quasi-Monte Carlo (Sobol, Halton)
 - **Generic over `F: Float`** -- works with `f32`, `f64`, and AD types (e.g., [echidna](https://github.com/Entrolution/echidna))
+- **`no_std` compatible** -- works without the standard library (with `alloc`)
+- **Optional parallelism** -- `parallel` feature for rayon-based `_par` methods
 - **Precomputed rules** -- generate nodes/weights once, integrate many times with zero allocation
 
 ## Quick Start
@@ -22,37 +25,53 @@ Add to `Cargo.toml`:
 bilby = "0.1"
 ```
 
-```rust,ignore
+```rust
 use bilby::GaussLegendre;
 
 // Create a 10-point Gauss-Legendre rule
-let gl = GaussLegendre::new(10);
+let gl = GaussLegendre::new(10).unwrap();
 
-// Integrate x^2 over [0, 1]
-let result = gl.integrate(0.0, 1.0, |x| x * x);
+// Integrate x^2 over [0, 1] (exact result = 1/3)
+let result = gl.rule().integrate(0.0, 1.0, |x: f64| x * x);
 assert!((result - 1.0 / 3.0).abs() < 1e-14);
 ```
 
-## Roadmap
+## Features
 
-bilby is being developed in phases. See [ROADMAP.md](ROADMAP.md) for details.
+| Feature | Default | Description |
+|---------|---------|-------------|
+| `std` | Yes | Enables `std::error::Error` impl and `cache` module |
+| `parallel` | No | Enables rayon-based `_par` methods (implies `std`) |
 
-| Phase | Version | Content |
-|-------|---------|---------|
-| 0 | v0.1.0 | Gauss-Legendre (Bogaert), Gauss-Kronrod pairs, fixed-order integration |
-| 1 | v0.2.0 | Adaptive integration (QUADPACK-style), infinite domains, error control |
-| 2 | v0.3.0 | Classical rule families (Jacobi, Laguerre, Hermite, Chebyshev, Radau, Lobatto, Clenshaw-Curtis) |
-| 3 | v0.4.0 | Multi-dimensional (tensor product, sparse grids, adaptive cubature, quasi-Monte Carlo) |
-| 4 | v0.5.0 | Specialty methods (oscillatory, tanh-sinh, Cauchy principal value) |
-| 5 | v1.0.0 | Parallelism, caching, `no_std`, benchmarks |
+### `no_std` support
+
+bilby works in `no_std` environments (with `alloc`). Disable default features:
+
+```toml
+[dependencies]
+bilby = { version = "0.1", default-features = false }
+```
+
+### Parallelism
+
+Enable the `parallel` feature for parallel variants of integration methods:
+
+```toml
+[dependencies]
+bilby = { version = "0.1", features = ["parallel"] }
+```
+
+This provides `integrate_composite_par`, `integrate_par`, `integrate_box_par`,
+`MonteCarloIntegrator::integrate_par`, and `GaussLegendre::new_par`.
 
 ## Development
 
 ```bash
-cargo test          # Run tests
-cargo bench         # Run benchmarks
-cargo clippy        # Lint
-cargo fmt           # Format
+cargo test                         # Run tests (default features)
+cargo test --all-features          # Run tests including parallel
+cargo test --no-default-features   # Run tests in no_std mode
+cargo bench                        # Run benchmarks
+cargo clippy --all-features        # Lint
 ```
 
 ## License
diff --git a/ROADMAP.md b/ROADMAP.md
deleted file mode 100644
index df59ab7..0000000
--- a/ROADMAP.md
+++ /dev/null
@@ -1,220 +0,0 @@
-# bilby — Numerical Quadrature for Rust
-
-A high-performance, comprehensive numerical integration library for Rust.
-
-**Design principles:**
-- Generic over `F: Float` (composable with echidna AD types out of the box)
-- No heap allocation on hot paths where possible
-- Precomputable rules: separate node/weight generation from integration
-- Consistent API: all rules return `(nodes, weights)` or `Result<T, QuadratureError>`
-- Feature-gated heavy dependencies (rayon, etc.)
-
-## Phase 0: Foundation
-
-Core types, trait design, and the first two rule families. This phase defines the API that everything else builds on.
-
-### 0.1 Project scaffolding ✅
-- `cargo init --lib`, dual MIT/Apache-2.0, MSRV 1.93
-- CI (clippy, fmt, test, MSRV check, coverage, security audit, publish workflow)
-- `QuadratureRule<F>` struct with `nodes: Vec<F>`, `weights: Vec<F>`
-- `QuadratureError` enum (ZeroOrder, DegenerateInterval, NotConverged)
-
-### 0.2 Gauss-Legendre (Bogaert algorithm) ✅
-- Newton iteration on Legendre recurrence for n <= 100 (Tricomi initial guess)
-- Bogaert (2014) asymptotic expansion for n > 100 (O(1) per node via Bessel zeros + Chebyshev interpolants)
-- `GaussLegendre::new(n) -> Result<GaussLegendre, QuadratureError>`
-- 16-digit accuracy verified against exact polynomial integration up to degree 2n-1
-
-### 0.3 Gauss-Kronrod (embedded pairs) ✅
-- G7-K15, G10-K21, G15-K31, G20-K41, G25-K51 pairs
-- Canonical QUADPACK coefficients (from netlib Fortran source)
-- Returns `(estimate, error_estimate)` with QUADPACK error heuristic
-- `GaussKronrod::new(pair)` with static coefficient slices (no allocation)
-
-### 0.4 Basic integration API ✅
-- `QuadratureRule::integrate(a, b, f)` — affine transform from [-1, 1] to [a, b]
-- `QuadratureRule::integrate_composite(a, b, n_panels, f)` — composite rule
-- `GaussKronrod::integrate(a, b, f)` — returns (estimate, error_estimate)
-- 22 unit tests + 4 doc tests
-
-**Milestone: v0.1.0** — Gauss-Legendre + Gauss-Kronrod, fixed-order integration.
-
-## Phase 1: Adaptive Integration
-
-The workhorse feature. Most real integration problems need error-driven refinement.
-
-### 1.1 Global adaptive subdivision (QUADPACK-style) ✅
-- `BinaryHeap`-based priority queue of subintervals ordered by error estimate
-- Bisects the interval with largest error, re-evaluates both halves via GK pairs
-- Terminates when `error <= max(abs_tol, rel_tol * |estimate|)` or max evaluations reached
-- `AdaptiveIntegrator` builder with configurable GK pair, tolerances, and eval budget
-- `adaptive_integrate(f, a, b, tol)` convenience function
-
-### 1.2 Singularity handling ✅
-- User-specified break points via `adaptive_integrate_with_breaks(f, a, b, breaks, tol)`
-- Break points seed the priority queue as separate sub-intervals for global adaptive refinement
-- Validation: breaks must be within (a, b), deduplicated and sorted automatically
-- Endpoint singularity detection deferred to future refinement
-
-### 1.3 Semi-infinite and infinite intervals ✅
-- Domain transforms: [a, ∞) via x = a + t/(1-t), (-∞, b] via x = b - t/(1-t), (-∞, ∞) via x = t/(1-t²)
-- `integrate_semi_infinite_upper(f, a, tol)`, `integrate_semi_infinite_lower(f, b, tol)`, `integrate_infinite(f, tol)`
-- Delegates to adaptive quadrature on the transformed finite domain
-- Gauss-Laguerre/Hermite as dedicated rule families deferred to Phase 2
-
-### 1.4 Error reporting ✅
-- `QuadratureResult<F>` struct: value, error_estimate, num_evals, converged flag
-- Non-convergence signalled via `converged: false` (always returns best estimate)
-- `QuadratureError` simplified: `NotConverged` removed, `InvalidInput` added
-- 43 unit tests + 13 doc tests
-
-**Milestone: v0.2.0** — Adaptive integration with error control, infinite domains.
-
-## Phase 2: Classical Rule Families
-
-Complete the Gaussian quadrature family.
-
-### 2.1 Gauss-Jacobi ✅
-- Weight function (1-x)^alpha * (1+x)^beta on [-1, 1]
-- Generalises Legendre (alpha=beta=0), Chebyshev (alpha=beta=-0.5), Gegenbauer
-- Golub-Welsch algorithm: eigenvalues of symmetric tridiagonal Jacobi matrix
-- Handles all valid parameters including the singular Chebyshev limit (alpha+beta=-1)
-
-### 2.2 Gauss-Laguerre (generalised) ✅
-- Weight function x^alpha * e^(-x) on [0, inf)
-- Golub-Welsch algorithm via Laguerre three-term recurrence coefficients
-
-### 2.3 Gauss-Hermite ✅
-- Weight function e^(-x^2) on (-inf, inf)
-- Golub-Welsch algorithm via physicists' Hermite recurrence coefficients
-
-### 2.4 Gauss-Chebyshev (types I and II) ✅
-- Closed-form nodes and weights (no iteration needed)
-- Useful as comparison/baseline and for Chebyshev interpolation
-
-### 2.5 Gauss-Radau and Gauss-Lobatto ✅
-- Gauss-Radau: Golub-Welsch with Radau modification of Legendre Jacobi matrix
-- Gauss-Lobatto: Newton on P'_{n-1}(x) using Legendre ODE second derivative
-- Left/right variants for Radau, both endpoints for Lobatto
-
-### 2.6 Clenshaw-Curtis ✅
-- Nodes at Chebyshev extrema cos(kπ/(n-1)), weights via explicit DCT formula
-- Nested: degree doubling reuses previous evaluations
-- Competitive with Gauss for smooth functions, better for adaptive refinement
-
-### 2.7 Golub-Welsch eigenvalue solver ✅
-- Shared `pub(crate)` module: symmetric tridiagonal QL algorithm with implicit shifts
-- Radau modification via continued fraction on characteristic polynomial
-- Used by Gauss-Jacobi, Gauss-Hermite, Gauss-Laguerre, and Gauss-Radau
-- 98 unit tests + 21 doc tests
-
-**Milestone: v0.3.0** — Full classical quadrature family.
-
-## Phase 3: Multi-Dimensional Integration
-
-### 3.1 Tensor product rules ✅
-- `TensorProductRule::new(&[&QuadratureRule<f64>])` for mixed-order N-D rules
-- `TensorProductRule::isotropic(rule, dim)` for same rule in all dimensions
-- `CubatureRule` with flat node storage, `integrate()` and `integrate_box()` methods
-- Simple but exponential cost (curse of dimensionality); practical for d <= 4-5
-
-### 3.2 Sparse grids (Smolyak) ✅
-- Smolyak combination technique from nested Clenshaw-Curtis rules
-- `SparseGrid::clenshaw_curtis(dim, level)` with quantised point merging
-- Practical for moderate dimensions (d <= ~20 for smooth functions)
-- Verified: correct point counts, weight sums, polynomial exactness
-
-### 3.3 Adaptive cubature (Genz-Malik) ✅
-- h-adaptive: `BinaryHeap`-based subdivision of worst subregion
-- Genz-Malik degree-7/5 embedded rule pair for error estimation
-- Fourth-difference criterion for split axis selection
-- `adaptive_cubature(f, lower, upper, tol)` convenience function
-- d=1 delegates to 1D adaptive integrator
-
-### 3.4 Monte Carlo / quasi-Monte Carlo ✅
-- Plain pseudo-random MC with Welford's online variance (Xoshiro256++ PRNG)
-- Quasi-MC: Sobol sequences (gray-code enumeration, 40 dimensions, Joe-Kuo direction numbers)
-- Quasi-MC: Halton sequences (radical-inverse function, 100 primes)
-- `MonteCarloIntegrator` builder with `Plain`/`Sobol`/`Halton` methods
-- Heuristic error estimate for QMC via N/2 vs N comparison
-- 136 unit tests + 28 doc tests
-
-**Milestone: v0.4.0** — Multi-dimensional integration.
-
-## Phase 4: Specialty Methods
-
-### 4.1 Oscillatory integrals ✅
-- Filon-Clenshaw-Curtis method for ∫f(x)sin(ωx)dx and ∫f(x)cos(ωx)dx
-- Chebyshev expansion of f at Clenshaw-Curtis nodes
-- Modified Chebyshev moments computed via Gauss-Legendre quadrature
-- Phase-shifted combination for sin/cos kernels; small-ω fallback to adaptive
-- `OscillatoryIntegrator` builder + `integrate_oscillatory_sin/cos` convenience functions
-
-### 4.2 Double-exponential (tanh-sinh) ✅
-- Transform x = tanh(π/2·sinh(t)) converts endpoint singularities to rapid decay
-- Self-adaptive via level doubling: each level halves step size, reuses all prior points
-- Handles algebraic (x^{-0.75}) and logarithmic (ln x) endpoint singularities
-- `TanhSinh` builder + `tanh_sinh_integrate` convenience function
-
-### 4.3 Cauchy principal value ✅
-- PV ∫f(x)/(x-c)dx = ∫[f(x)-f(c)]/(x-c)dx + f(c)·ln((b-c)/(c-a))
-- Subtraction technique with adaptive integration of the regularised remainder
-- Automatic breakpoint at the pole via `adaptive_integrate_with_breaks`
-- `CauchyPV` builder + `pv_integrate` convenience function
-
-### 4.4 Weighted integration ✅
-- Unified API: `∫f(x)·w(x)dx` dispatching to appropriate Gaussian rule
-- Jacobi, Laguerre, Hermite, Chebyshev I/II weight families
-- LogWeight w(x)=-ln(x) via Gauss-Laguerre α=1 substitution (x=e^{-t})
-- `WeightedIntegrator` builder + `weighted_integrate` convenience function
-- `integrate_over(a, b, f)` for affine-mapped finite domains
-- 172 unit tests + 35 doc tests
-
-**Milestone: v0.5.0** — Specialty quadrature.
-
-## Phase 5: Performance and Polish
-
-### 5.1 Parallelism (`parallel` feature)
-- Parallel adaptive subdivision (evaluate subintervals concurrently)
-- Parallel node/weight generation for large n
-- Vectorised integrand batching (evaluate f at multiple points per call)
-
-### 5.2 Precomputed rule caching
-- `LazyRule` / compile-time tables for common orders
-- Avoid recomputing nodes/weights in hot loops
-
-### 5.3 `no_std` core
-- Core rule computations without alloc where feasible
-- Fixed-size rules via const generics
-
-### 5.4 Comprehensive benchmarks
-- Against gauss-quad, quadrature, quad_gk
-- Against SciPy/QUADPACK reference values
-- Criterion benchmarks for node generation, integration, adaptive convergence
-
-**Milestone: v1.0.0** — Production-ready.
-
-## Non-Goals (at least initially)
-
-- ODE/PDE solvers (different domain)
-- Symbolic integration
-- Arbitrary-precision arithmetic (stay in f32/f64 land)
-- GPU acceleration (diminishing returns for quadrature vs. the integrand evaluation itself)
-
-## Key Design Decisions to Make Early
-
-1. **Trait bounds**: `F: Float` (num-traits) vs custom trait. Leaning toward num-traits `Float` for echidna compatibility.
-2. **Node/weight storage**: `Vec<F>` vs `&[F]` vs const-generic arrays. Likely `Vec` for flexibility with a `SmallRule<F, N>` const-generic variant for known small sizes.
-3. **Integrand signature**: `Fn(F) -> F` vs `Fn(&[F]) -> F` for multi-dimensional. Probably separate 1D and N-D APIs rather than a unified signature.
-4. **Error model**: return `(value, error)` tuple vs a `QuadratureResult` struct. Struct is more extensible (can add num_evals, converged flag later).
-5. **Rule representation**: separate types per family (GaussLegendre, GaussKronrod, etc.) vs enum. Separate types with a shared trait — more ergonomic and allows family-specific methods.
-
-## References
-
-- Bogaert (2014) — [Iteration-Free Computation of Gauss-Legendre Quadrature Nodes and Weights](https://www.cfm.brown.edu/faculty/gk/APMA2560/Handouts/GL_quad_Bogaert_2014.pdf)
-- QUADPACK — Piessens, de Doncker-Kapenga, Uberhuber, Kahaner (1983)
-- [FastGaussQuadrature.jl](https://github.com/JuliaApproximation/FastGaussQuadrature.jl) — Julia reference implementation
-- [cubature](https://github.com/stevengj/cubature) — Steven G. Johnson's adaptive cubature in C
-- Genz & Malik (1980) — adaptive cubature rules for hypercubes
-- [SciPy integrate](https://docs.scipy.org/doc/scipy/tutorial/integrate.html) — Python QUADPACK wrapper
-- [sparse-grids.de](http://www.sparse-grids.de/) — Sparse grid reference tables and theory
diff --git a/benches/cubature.rs b/benches/cubature.rs
new file mode 100644
index 0000000..344995d
--- /dev/null
+++ b/benches/cubature.rs
@@ -0,0 +1,130 @@
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+
+use bilby::cubature::{
+    adaptive_cubature, monte_carlo_integrate, MCMethod, MonteCarloIntegrator, SparseGrid,
+    TensorProductRule,
+};
+use bilby::GaussLegendre;
+
+fn bench_tensor_product(c: &mut Criterion) {
+    let mut group = c.benchmark_group("tensor_product");
+
+    // d=2, n=10
+    let gl10 = GaussLegendre::new(10).unwrap();
+    let tp_2d = TensorProductRule::isotropic(gl10.rule(), 2).unwrap();
+    group.bench_function("d2_n10", |b| {
+        b.iter(|| {
+            tp_2d
+                .rule()
+                .integrate_box(&[0.0, 0.0], &[1.0, 1.0], |x| x[0] * x[1])
+        });
+    });
+
+    // d=3, n=5
+    let gl5 = GaussLegendre::new(5).unwrap();
+    let tp_3d = TensorProductRule::isotropic(gl5.rule(), 3).unwrap();
+    group.bench_function("d3_n5", |b| {
+        b.iter(|| {
+            tp_3d
+                .rule()
+                .integrate_box(&[0.0, 0.0, 0.0], &[1.0, 1.0, 1.0], |x| x[0] * x[1] * x[2])
+        });
+    });
+
+    group.finish();
+}
+
+fn bench_sparse_grid(c: &mut Criterion) {
+    let mut group = c.benchmark_group("sparse_grid");
+
+    let sg_3_3 = SparseGrid::clenshaw_curtis(3, 3).unwrap();
+    group.bench_function("d3_l3", |b| {
+        b.iter(|| {
+            sg_3_3
+                .rule()
+                .integrate_box(&[0.0; 3], &[1.0; 3], |x| (x[0] + x[1] + x[2]).exp())
+        });
+    });
+
+    let sg_5_3 = SparseGrid::clenshaw_curtis(5, 3).unwrap();
+    group.bench_function("d5_l3", |b| {
+        b.iter(|| {
+            sg_5_3.rule().integrate_box(&[0.0; 5], &[1.0; 5], |x| {
+                (x[0] + x[1] + x[2] + x[3] + x[4]).exp()
+            })
+        });
+    });
+
+    group.finish();
+}
+
+fn bench_adaptive_cubature(c: &mut Criterion) {
+    let mut group = c.benchmark_group("adaptive_cubature");
+
+    group.bench_function("d2_gaussian", |b| {
+        b.iter(|| {
+            adaptive_cubature(
+                |x| (-x[0] * x[0] - x[1] * x[1]).exp(),
+                &[0.0, 0.0],
+                &[1.0, 1.0],
+                1e-6,
+            )
+            .unwrap()
+        });
+    });
+
+    group.bench_function("d3_polynomial", |b| {
+        b.iter(|| {
+            adaptive_cubature(
+                |x| x[0] * x[0] + x[1] * x[1] + x[2] * x[2],
+                &[0.0, 0.0, 0.0],
+                &[1.0, 1.0, 1.0],
+                1e-8,
+            )
+            .unwrap()
+        });
+    });
+
+    group.finish();
+}
+
+fn bench_monte_carlo(c: &mut Criterion) {
+    let mut group = c.benchmark_group("monte_carlo");
+
+    let f = |x: &[f64]| (x[0] * x[1] * x[2]).exp();
+    let lower = [0.0; 3];
+    let upper = [1.0; 3];
+    let n = 10_000;
+
+    for (name, method) in [
+        ("plain", MCMethod::Plain),
+        ("sobol", MCMethod::Sobol),
+        ("halton", MCMethod::Halton),
+    ] {
+        group.bench_with_input(BenchmarkId::new("3d_exp", name), &method, |b, &method| {
+            b.iter(|| {
+                MonteCarloIntegrator::default()
+                    .with_method(method)
+                    .with_samples(n)
+                    .integrate(&lower, &upper, &f)
+                    .unwrap()
+            });
+        });
+    }
+
+    // Also benchmark the convenience function
+    group.bench_function("sobol_convenience_10k", |b| {
+        b.iter(|| monte_carlo_integrate(&f, &lower, &upper, n).unwrap());
+    });
+
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    bench_tensor_product,
+    bench_sparse_grid,
+    bench_adaptive_cubature,
+    bench_monte_carlo,
+);
+criterion_main!(benches);
diff --git a/benches/integration_1d.rs b/benches/integration_1d.rs
new file mode 100644
index 0000000..124559a
--- /dev/null
+++ b/benches/integration_1d.rs
@@ -0,0 +1,110 @@
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+
+use bilby::{
+    adaptive_integrate, tanh_sinh_integrate, GKPair, GaussKronrod, GaussLegendre,
+    OscillatoryIntegrator, OscillatoryKernel,
+};
+
+fn bench_fixed_order_gl(c: &mut Criterion) {
+    let mut group = c.benchmark_group("fixed_gl_sin");
+    for n in [10, 20, 50] {
+        let gl = GaussLegendre::new(n).unwrap();
+        group.bench_with_input(BenchmarkId::from_parameter(n), &gl, |b, gl| {
+            b.iter(|| {
+                gl.rule()
+                    .integrate(0.0, std::f64::consts::PI, |x: f64| x.sin())
+            });
+        });
+    }
+    group.finish();
+}
+
+fn bench_composite_gl(c: &mut Criterion) {
+    let gl = GaussLegendre::new(10).unwrap();
+    let mut group = c.benchmark_group("composite_gl_sin");
+    for panels in [10, 100, 1_000] {
+        group.bench_with_input(
+            BenchmarkId::from_parameter(panels),
+            &panels,
+            |b, &panels| {
+                b.iter(|| {
+                    gl.rule()
+                        .integrate_composite(0.0, std::f64::consts::PI, panels, |x: f64| x.sin())
+                });
+            },
+        );
+    }
+    group.finish();
+}
+
+fn bench_gk_pairs(c: &mut Criterion) {
+    let mut group = c.benchmark_group("gk_sin");
+    for (name, pair) in [("G7K15", GKPair::G7K15), ("G15K31", GKPair::G15K31)] {
+        let gk = GaussKronrod::new(pair);
+        group.bench_function(name, |b| {
+            b.iter(|| gk.integrate(0.0, std::f64::consts::PI, |x: f64| x.sin()));
+        });
+    }
+    group.finish();
+}
+
+fn bench_adaptive(c: &mut Criterion) {
+    let mut group = c.benchmark_group("adaptive");
+    group.bench_function("smooth_sin", |b| {
+        b.iter(|| adaptive_integrate(|x: f64| x.sin(), 0.0, std::f64::consts::PI, 1e-10).unwrap());
+    });
+    group.bench_function("peaked", |b| {
+        b.iter(|| {
+            adaptive_integrate(
+                |x: f64| 1.0 / (1.0 + (x - 0.3).powi(2) * 1e4),
+                0.0,
+                1.0,
+                1e-8,
+            )
+            .unwrap()
+        });
+    });
+    group.bench_function("singular_sqrt", |b| {
+        b.iter(|| adaptive_integrate(|x: f64| 1.0 / x.sqrt(), 1e-15, 1.0, 1e-8).unwrap());
+    });
+    group.finish();
+}
+
+fn bench_tanh_sinh(c: &mut Criterion) {
+    let mut group = c.benchmark_group("tanh_sinh");
+    group.bench_function("smooth_sin", |b| {
+        b.iter(|| tanh_sinh_integrate(|x: f64| x.sin(), 0.0, std::f64::consts::PI, 1e-10).unwrap());
+    });
+    group.bench_function("singular_sqrt", |b| {
+        b.iter(|| tanh_sinh_integrate(|x: f64| 1.0 / x.sqrt(), 0.0, 1.0, 1e-8).unwrap());
+    });
+    group.finish();
+}
+
+fn bench_oscillatory(c: &mut Criterion) {
+    let mut group = c.benchmark_group("oscillatory");
+    for omega in [10.0, 100.0] {
+        group.bench_with_input(
+            BenchmarkId::new("sin", omega as u64),
+            &omega,
+            |b, &omega| {
+                let integrator = OscillatoryIntegrator::new(OscillatoryKernel::Sine, omega)
+                    .with_order(64)
+                    .with_abs_tol(1e-10);
+                b.iter(|| integrator.integrate(0.0, 1.0, |_| 1.0).unwrap());
+            },
+        );
+    }
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    bench_fixed_order_gl,
+    bench_composite_gl,
+    bench_gk_pairs,
+    bench_adaptive,
+    bench_tanh_sinh,
+    bench_oscillatory,
+);
+criterion_main!(benches);
diff --git a/benches/node_generation.rs b/benches/node_generation.rs
new file mode 100644
index 0000000..fa43490
--- /dev/null
+++ b/benches/node_generation.rs
@@ -0,0 +1,56 @@
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+
+use bilby::{ClenshawCurtis, GaussHermite, GaussJacobi, GaussLaguerre, GaussLegendre};
+
+fn bench_gl_newton(c: &mut Criterion) {
+    let mut group = c.benchmark_group("gl_newton");
+    for n in [10, 50, 100] {
+        group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &n| {
+            b.iter(|| GaussLegendre::new(n).unwrap());
+        });
+    }
+    group.finish();
+}
+
+fn bench_gl_bogaert(c: &mut Criterion) {
+    let mut group = c.benchmark_group("gl_bogaert");
+    for n in [200, 1_000, 10_000, 100_000] {
+        group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &n| {
+            b.iter(|| GaussLegendre::new(n).unwrap());
+        });
+    }
+    group.finish();
+}
+
+fn bench_golub_welsch(c: &mut Criterion) {
+    let mut group = c.benchmark_group("golub_welsch");
+    group.bench_function("jacobi_n50", |b| {
+        b.iter(|| GaussJacobi::new(50, 0.5, 0.5).unwrap());
+    });
+    group.bench_function("hermite_n50", |b| {
+        b.iter(|| GaussHermite::new(50).unwrap());
+    });
+    group.bench_function("laguerre_n50", |b| {
+        b.iter(|| GaussLaguerre::new(50, 0.0).unwrap());
+    });
+    group.finish();
+}
+
+fn bench_clenshaw_curtis(c: &mut Criterion) {
+    let mut group = c.benchmark_group("clenshaw_curtis");
+    for n in [33, 65, 129] {
+        group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &n| {
+            b.iter(|| ClenshawCurtis::new(n).unwrap());
+        });
+    }
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    bench_gl_newton,
+    bench_gl_bogaert,
+    bench_golub_welsch,
+    bench_clenshaw_curtis,
+);
+criterion_main!(benches);
diff --git a/src/adaptive.rs b/src/adaptive.rs
index b466fb6..124258d 100644
--- a/src/adaptive.rs
+++ b/src/adaptive.rs
@@ -10,12 +10,18 @@
 //! ```
 //! use bilby::adaptive_integrate;
 //!
-//! let result = adaptive_integrate(|x: f64| x.sin(), 0.0, std::f64::consts::PI, 1e-10).unwrap();
+//! let result = adaptive_integrate(|x: f64| x.sin(), 0.0, core::f64::consts::PI, 1e-10).unwrap();
 //! assert!((result.value - 2.0).abs() < 1e-10);
 //! assert!(result.is_converged());
 //! ```
 
-use std::cmp::Ordering;
+use core::cmp::Ordering;
+
+#[cfg(not(feature = "std"))]
+use alloc::collections::BinaryHeap;
+#[cfg(not(feature = "std"))]
+use alloc::{vec, vec::Vec};
+#[cfg(feature = "std")]
 use std::collections::BinaryHeap;
 
 use crate::error::QuadratureError;
@@ -372,7 +378,7 @@ mod tests {
     /// Integral of sin(x) over [0, pi] = 2.
     #[test]
     fn sin_integral() {
-        let r = adaptive_integrate(f64::sin, 0.0, std::f64::consts::PI, 1e-12).unwrap();
+        let r = adaptive_integrate(f64::sin, 0.0, core::f64::consts::PI, 1e-12).unwrap();
         assert!(r.converged);
         assert!((r.value - 2.0).abs() < 1e-12, "value={}", r.value);
     }
@@ -389,8 +395,8 @@ mod tests {
     /// Reversed bounds should negate the result.
     #[test]
     fn reversed_bounds() {
-        let forward = adaptive_integrate(f64::sin, 0.0, std::f64::consts::PI, 1e-12).unwrap();
-        let reverse = adaptive_integrate(f64::sin, std::f64::consts::PI, 0.0, 1e-12).unwrap();
+        let forward = adaptive_integrate(f64::sin, 0.0, core::f64::consts::PI, 1e-12).unwrap();
+        let reverse = adaptive_integrate(f64::sin, core::f64::consts::PI, 0.0, 1e-12).unwrap();
         assert!(
             (forward.value + reverse.value).abs() < 1e-12,
             "forward={}, reverse={}",
@@ -450,7 +456,7 @@ mod tests {
             .with_abs_tol(1e-14)
             .with_rel_tol(1e-14)
             .with_max_evals(100_000)
-            .integrate(0.0, std::f64::consts::PI, f64::sin)
+            .integrate(0.0, core::f64::consts::PI, f64::sin)
             .unwrap();
         assert!(r.converged, "err={}", r.error_estimate);
         assert!((r.value - 2.0).abs() < 1e-14, "value={}", r.value);
diff --git a/src/cache.rs b/src/cache.rs
new file mode 100644
index 0000000..139abce
--- /dev/null
+++ b/src/cache.rs
@@ -0,0 +1,36 @@
+//! Precomputed quadrature rule cache.
+//!
+//! Provides lazily-initialised, globally-shared instances of common
+//! Gauss-Legendre rules. Rules are computed once on first access and
+//! reused for the lifetime of the process.
+//!
+//! # Example
+//!
+//! ```
+//! use bilby::cache::GL10;
+//!
+//! let result = GL10.rule().integrate(0.0, 1.0, |x: f64| x * x);
+//! assert!((result - 1.0 / 3.0).abs() < 1e-14);
+//! ```
+
+use std::sync::LazyLock;
+
+use crate::gauss_legendre::GaussLegendre;
+
+/// Precomputed 5-point Gauss-Legendre rule.
+pub static GL5: LazyLock<GaussLegendre> = LazyLock::new(|| GaussLegendre::new(5).unwrap());
+
+/// Precomputed 10-point Gauss-Legendre rule.
+pub static GL10: LazyLock<GaussLegendre> = LazyLock::new(|| GaussLegendre::new(10).unwrap());
+
+/// Precomputed 15-point Gauss-Legendre rule.
+pub static GL15: LazyLock<GaussLegendre> = LazyLock::new(|| GaussLegendre::new(15).unwrap());
+
+/// Precomputed 20-point Gauss-Legendre rule.
+pub static GL20: LazyLock<GaussLegendre> = LazyLock::new(|| GaussLegendre::new(20).unwrap());
+
+/// Precomputed 50-point Gauss-Legendre rule.
+pub static GL50: LazyLock<GaussLegendre> = LazyLock::new(|| GaussLegendre::new(50).unwrap());
+
+/// Precomputed 100-point Gauss-Legendre rule.
+pub static GL100: LazyLock<GaussLegendre> = LazyLock::new(|| GaussLegendre::new(100).unwrap());
diff --git a/src/cauchy_pv.rs b/src/cauchy_pv.rs
index c45ed71..d6a6e80 100644
--- a/src/cauchy_pv.rs
+++ b/src/cauchy_pv.rs
@@ -18,6 +18,9 @@ use crate::adaptive::AdaptiveIntegrator;
 use crate::error::QuadratureError;
 use crate::result::QuadratureResult;
 
+#[cfg(not(feature = "std"))]
+use num_traits::Float as _;
+
 /// Builder for Cauchy principal value integration.
 ///
 /// Computes PV ∫ₐᵇ f(x)/(x-c) dx via the subtraction technique:
diff --git a/src/clenshaw_curtis.rs b/src/clenshaw_curtis.rs
index ae1a9c1..f6a5971 100644
--- a/src/clenshaw_curtis.rs
+++ b/src/clenshaw_curtis.rs
@@ -12,6 +12,11 @@
 use crate::error::QuadratureError;
 use crate::rule::QuadratureRule;
 
+#[cfg(not(feature = "std"))]
+use alloc::{vec, vec::Vec};
+#[cfg(not(feature = "std"))]
+use num_traits::Float as _;
+
 /// A Clenshaw-Curtis quadrature rule on \[-1, 1\].
 ///
 /// # Example
@@ -45,21 +50,25 @@ impl ClenshawCurtis {
     }
 
     /// Returns a reference to the underlying quadrature rule.
+    #[inline]
     pub fn rule(&self) -> &QuadratureRule<f64> {
         &self.rule
     }
 
     /// Returns the number of quadrature points.
+    #[inline]
     pub fn order(&self) -> usize {
         self.rule.order()
     }
 
     /// Returns the nodes on \[-1, 1\].
+    #[inline]
     pub fn nodes(&self) -> &[f64] {
         &self.rule.nodes
     }
 
     /// Returns the weights.
+    #[inline]
     pub fn weights(&self) -> &[f64] {
         &self.rule.weights
     }
@@ -70,7 +79,7 @@ impl ClenshawCurtis {
 /// For n == 1: midpoint rule (x=0, w=2).
 /// For n >= 2: nodes at x_k = cos(k π / (n-1)), weights via explicit formula.
 fn compute_clenshaw_curtis(n: usize) -> (Vec<f64>, Vec<f64>) {
-    use std::f64::consts::PI;
+    use core::f64::consts::PI;
 
     if n == 1 {
         return (vec![0.0], vec![2.0]);
@@ -209,7 +218,7 @@ mod tests {
     #[test]
     fn sin_integration() {
         let cc = ClenshawCurtis::new(21).unwrap();
-        let result = cc.rule().integrate(0.0, std::f64::consts::PI, f64::sin);
+        let result = cc.rule().integrate(0.0, core::f64::consts::PI, f64::sin);
         assert!((result - 2.0).abs() < 1e-12, "result={result}");
     }
 
diff --git a/src/cubature/adaptive.rs b/src/cubature/adaptive.rs
index b4d2da0..ddd4549 100644
--- a/src/cubature/adaptive.rs
+++ b/src/cubature/adaptive.rs
@@ -7,7 +7,15 @@
 //! Practical for dimensions d ≤ ~7 (the rule uses 2^d vertex evaluations).
 //! For d = 1, delegates to the 1D adaptive integrator.
 
-use std::cmp::Ordering;
+use core::cmp::Ordering;
+
+#[cfg(not(feature = "std"))]
+use alloc::collections::BinaryHeap;
+#[cfg(not(feature = "std"))]
+use alloc::{vec, vec::Vec};
+#[cfg(not(feature = "std"))]
+use num_traits::Float as _;
+#[cfg(feature = "std")]
 use std::collections::BinaryHeap;
 
 use crate::error::QuadratureError;
@@ -426,7 +434,7 @@ mod tests {
     #[test]
     fn delegates_to_1d() {
         let result =
-            adaptive_cubature(|x| x[0].sin(), &[0.0], &[std::f64::consts::PI], 1e-10).unwrap();
+            adaptive_cubature(|x| x[0].sin(), &[0.0], &[core::f64::consts::PI], 1e-10).unwrap();
         assert!((result.value - 2.0).abs() < 1e-10, "value={}", result.value);
     }
 
diff --git a/src/cubature/halton.rs b/src/cubature/halton.rs
index 0e520b8..38b20d4 100644
--- a/src/cubature/halton.rs
+++ b/src/cubature/halton.rs
@@ -5,6 +5,9 @@
 
 use crate::error::QuadratureError;
 
+#[cfg(not(feature = "std"))]
+use alloc::vec::Vec;
+
 /// First 100 primes for Halton sequence bases.
 const PRIMES: [u32; 100] = [
     2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97,
@@ -93,6 +96,9 @@ fn radical_inverse(mut n: u64, base: u32) -> f64 {
 mod tests {
     use super::*;
 
+    #[cfg(not(feature = "std"))]
+    use alloc::vec;
+
     #[test]
     fn first_few_points() {
         let mut hal = HaltonSequence::new(2).unwrap();
diff --git a/src/cubature/mod.rs b/src/cubature/mod.rs
index 18b0e4c..2d583c2 100644
--- a/src/cubature/mod.rs
+++ b/src/cubature/mod.rs
@@ -38,6 +38,9 @@ pub use monte_carlo::{monte_carlo_integrate, MCMethod, MonteCarloIntegrator};
 pub use sparse_grid::{SparseGrid, SparseGridBasis};
 pub use tensor::TensorProductRule;
 
+#[cfg(not(feature = "std"))]
+use alloc::{vec, vec::Vec};
+
 /// A precomputed multi-dimensional cubature rule.
 ///
 /// Nodes are stored in a flat layout: node `i` occupies
@@ -69,26 +72,31 @@ impl CubatureRule {
     }
 
     /// Number of cubature points.
+    #[inline]
     pub fn num_points(&self) -> usize {
         self.weights.len()
     }
 
     /// Spatial dimension.
+    #[inline]
     pub fn dim(&self) -> usize {
         self.dim
     }
 
     /// Access the i-th node as a slice of length `dim`.
+    #[inline]
     pub fn node(&self, i: usize) -> &[f64] {
         &self.nodes[i * self.dim..(i + 1) * self.dim]
     }
 
     /// The weights.
+    #[inline]
     pub fn weights(&self) -> &[f64] {
         &self.weights
     }
 
     /// Integrate `f` over the reference domain (assumes nodes are on \[-1, 1\]^d).
+    #[inline]
     pub fn integrate<G>(&self, f: G) -> f64
     where
         G: Fn(&[f64]) -> f64,
@@ -126,6 +134,54 @@ impl CubatureRule {
         }
         sum * jacobian
     }
+
+    /// Parallel integration over the reference domain \[-1, 1\]^d.
+    ///
+    /// Identical to [`integrate`](Self::integrate) but evaluates points in parallel.
+    #[cfg(feature = "parallel")]
+    pub fn integrate_par<G>(&self, f: G) -> f64
+    where
+        G: Fn(&[f64]) -> f64 + Sync,
+    {
+        use rayon::prelude::*;
+
+        let d = self.dim;
+        (0..self.num_points())
+            .into_par_iter()
+            .map(|i| self.weights[i] * f(&self.nodes[i * d..(i + 1) * d]))
+            .sum()
+    }
+
+    /// Parallel integration over the hyperrectangle \[lower, upper\].
+    ///
+    /// Identical to [`integrate_box`](Self::integrate_box) but evaluates points in parallel.
+    #[cfg(feature = "parallel")]
+    pub fn integrate_box_par<G>(&self, lower: &[f64], upper: &[f64], f: G) -> f64
+    where
+        G: Fn(&[f64]) -> f64 + Sync,
+    {
+        use rayon::prelude::*;
+
+        assert_eq!(lower.len(), self.dim);
+        assert_eq!(upper.len(), self.dim);
+
+        let d = self.dim;
+        let half_widths: Vec<f64> = (0..d).map(|j| 0.5 * (upper[j] - lower[j])).collect();
+        let midpoints: Vec<f64> = (0..d).map(|j| 0.5 * (lower[j] + upper[j])).collect();
+        let jacobian: f64 = half_widths.iter().product();
+
+        let sum: f64 = (0..self.num_points())
+            .into_par_iter()
+            .map(|i| {
+                let node = &self.nodes[i * d..(i + 1) * d];
+                let x: Vec<f64> = (0..d)
+                    .map(|j| half_widths[j] * node[j] + midpoints[j])
+                    .collect();
+                self.weights[i] * f(&x)
+            })
+            .sum();
+        sum * jacobian
+    }
 }
 
 #[cfg(test)]
@@ -149,4 +205,34 @@ mod tests {
         let result = rule.integrate_box(&[0.0, 0.0], &[2.0, 3.0], |_| 1.0);
         assert!((result - 6.0).abs() < 1e-14); // area = 2*3 = 6
     }
+
+    /// Parallel integrate matches sequential on reference domain.
+    #[cfg(feature = "parallel")]
+    #[test]
+    fn integrate_par_matches_sequential() {
+        use crate::cubature::TensorProductRule;
+        use crate::GaussLegendre;
+
+        let gl = GaussLegendre::new(10).unwrap();
+        let tp = TensorProductRule::isotropic(gl.rule(), 3).unwrap();
+        let f = |x: &[f64]| (x[0] * x[1] + x[2]).sin();
+        let seq = tp.rule().integrate(&f);
+        let par = tp.rule().integrate_par(&f);
+        assert!((seq - par).abs() < 1e-14, "seq={seq}, par={par}");
+    }
+
+    /// Parallel integrate_box matches sequential.
+    #[cfg(feature = "parallel")]
+    #[test]
+    fn integrate_box_par_matches_sequential() {
+        use crate::cubature::TensorProductRule;
+        use crate::GaussLegendre;
+
+        let gl = GaussLegendre::new(10).unwrap();
+        let tp = TensorProductRule::isotropic(gl.rule(), 2).unwrap();
+        let f = |x: &[f64]| x[0] * x[1];
+        let seq = tp.rule().integrate_box(&[0.0, 0.0], &[1.0, 1.0], &f);
+        let par = tp.rule().integrate_box_par(&[0.0, 0.0], &[1.0, 1.0], &f);
+        assert!((seq - par).abs() < 1e-14, "seq={seq}, par={par}");
+    }
 }
diff --git a/src/cubature/monte_carlo.rs b/src/cubature/monte_carlo.rs
index d7f6962..adca729 100644
--- a/src/cubature/monte_carlo.rs
+++ b/src/cubature/monte_carlo.rs
@@ -8,6 +8,11 @@ use crate::cubature::sobol::SobolSequence;
 use crate::error::QuadratureError;
 use crate::result::QuadratureResult;
 
+#[cfg(not(feature = "std"))]
+use alloc::{vec, vec::Vec};
+#[cfg(not(feature = "std"))]
+use num_traits::Float as _;
+
 /// Monte Carlo integration method.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum MCMethod {
@@ -243,6 +248,222 @@ impl MonteCarloIntegrator {
             converged: true,
         })
     }
+
+    /// Parallel Monte Carlo integration over the hyperrectangle \[lower, upper\].
+    ///
+    /// For Sobol and Halton methods, all sample points are pre-generated
+    /// sequentially (the sequences are inherently serial), then function
+    /// evaluations are parallelised. For Plain MC, the sample space is split
+    /// into independent chunks with separate PRNGs, giving statistically
+    /// equivalent but numerically different results compared to sequential.
+    #[cfg(feature = "parallel")]
+    pub fn integrate_par<G>(
+        &self,
+        lower: &[f64],
+        upper: &[f64],
+        f: G,
+    ) -> Result<QuadratureResult<f64>, QuadratureError>
+    where
+        G: Fn(&[f64]) -> f64 + Sync,
+    {
+        let d = lower.len();
+        if d == 0 || upper.len() != d {
+            return Err(QuadratureError::InvalidInput(
+                "lower and upper must have equal nonzero length",
+            ));
+        }
+        if self.num_samples == 0 {
+            return Err(QuadratureError::InvalidInput(
+                "number of samples must be >= 1",
+            ));
+        }
+
+        let widths: Vec<f64> = (0..d).map(|j| upper[j] - lower[j]).collect();
+        let volume: f64 = widths.iter().product();
+        let n = self.num_samples;
+
+        match self.method {
+            MCMethod::Plain => self.integrate_plain_par(d, lower, &widths, volume, n, &f),
+            MCMethod::Sobol => self.integrate_qmc_par_sobol(d, lower, &widths, volume, n, &f),
+            MCMethod::Halton => self.integrate_qmc_par_halton(d, lower, &widths, volume, n, &f),
+        }
+    }
+
+    #[cfg(feature = "parallel")]
+    fn integrate_plain_par<G>(
+        &self,
+        d: usize,
+        lower: &[f64],
+        widths: &[f64],
+        volume: f64,
+        n: usize,
+        f: &G,
+    ) -> Result<QuadratureResult<f64>, QuadratureError>
+    where
+        G: Fn(&[f64]) -> f64 + Sync,
+    {
+        use rayon::prelude::*;
+
+        let num_chunks = rayon::current_num_threads().max(1);
+        let chunk_size = n / num_chunks;
+        let remainder = n % num_chunks;
+
+        // Each chunk uses an independent PRNG seeded from self.seed + chunk_id
+        let chunk_results: Vec<(f64, f64, usize)> = (0..num_chunks)
+            .into_par_iter()
+            .map(|chunk_id| {
+                let my_n = chunk_size + if chunk_id < remainder { 1 } else { 0 };
+                if my_n == 0 {
+                    return (0.0, 0.0, 0);
+                }
+
+                let mut rng = Xoshiro256pp::new(self.seed.wrapping_add(chunk_id as u64));
+                let mut x = vec![0.0; d];
+                let mut mean = 0.0;
+                let mut m2 = 0.0;
+
+                for i in 1..=my_n {
+                    for j in 0..d {
+                        x[j] = lower[j] + widths[j] * rng.next_f64();
+                    }
+                    let val = f(&x);
+                    let delta = val - mean;
+                    mean += delta / i as f64;
+                    let delta2 = val - mean;
+                    m2 += delta * delta2;
+                }
+
+                (mean * my_n as f64, m2, my_n)
+            })
+            .collect();
+
+        // Merge chunk results
+        let mut total_sum = 0.0;
+        let mut total_m2 = 0.0;
+        let mut total_n = 0usize;
+        for (sum, m2, count) in chunk_results {
+            total_sum += sum;
+            total_m2 += m2;
+            total_n += count;
+        }
+
+        let mean = total_sum / total_n as f64;
+        let variance = if total_n > 1 {
+            total_m2 / (total_n - 1) as f64
+        } else {
+            0.0
+        };
+        let std_error = (variance / total_n as f64).sqrt() * volume;
+
+        Ok(QuadratureResult {
+            value: volume * mean,
+            error_estimate: std_error,
+            num_evals: total_n,
+            converged: true,
+        })
+    }
+
+    #[cfg(feature = "parallel")]
+    fn integrate_qmc_par_sobol<G>(
+        &self,
+        d: usize,
+        lower: &[f64],
+        widths: &[f64],
+        volume: f64,
+        n: usize,
+        f: &G,
+    ) -> Result<QuadratureResult<f64>, QuadratureError>
+    where
+        G: Fn(&[f64]) -> f64 + Sync,
+    {
+        use rayon::prelude::*;
+
+        // Pre-generate all Sobol points (sequential — gray-code is inherently serial)
+        let mut sob = SobolSequence::new(d)?;
+        let mut points = vec![0.0; n * d];
+        let mut u = vec![0.0; d];
+        for i in 0..n {
+            sob.next_point(&mut u);
+            for j in 0..d {
+                points[i * d + j] = lower[j] + widths[j] * u[j];
+            }
+        }
+
+        // Parallel evaluation
+        let sum: f64 = (0..n)
+            .into_par_iter()
+            .map(|i| f(&points[i * d..(i + 1) * d]))
+            .sum();
+
+        let estimate = volume * sum / n as f64;
+
+        // Heuristic error: compare N/2 and N estimates
+        let half = n / 2;
+        let half_sum: f64 = (0..half)
+            .into_par_iter()
+            .map(|i| f(&points[i * d..(i + 1) * d]))
+            .sum();
+        let half_estimate = volume * half_sum / half as f64;
+        let error = (estimate - half_estimate).abs();
+
+        Ok(QuadratureResult {
+            value: estimate,
+            error_estimate: error,
+            num_evals: n + half,
+            converged: true,
+        })
+    }
+
+    #[cfg(feature = "parallel")]
+    fn integrate_qmc_par_halton<G>(
+        &self,
+        d: usize,
+        lower: &[f64],
+        widths: &[f64],
+        volume: f64,
+        n: usize,
+        f: &G,
+    ) -> Result<QuadratureResult<f64>, QuadratureError>
+    where
+        G: Fn(&[f64]) -> f64 + Sync,
+    {
+        use rayon::prelude::*;
+
+        // Pre-generate all Halton points
+        let mut hal = HaltonSequence::new(d)?;
+        let mut points = vec![0.0; n * d];
+        let mut u = vec![0.0; d];
+        for i in 0..n {
+            hal.next_point(&mut u);
+            for j in 0..d {
+                points[i * d + j] = lower[j] + widths[j] * u[j];
+            }
+        }
+
+        // Parallel evaluation
+        let sum: f64 = (0..n)
+            .into_par_iter()
+            .map(|i| f(&points[i * d..(i + 1) * d]))
+            .sum();
+
+        let estimate = volume * sum / n as f64;
+
+        // Heuristic error
+        let half = n / 2;
+        let half_sum: f64 = (0..half)
+            .into_par_iter()
+            .map(|i| f(&points[i * d..(i + 1) * d]))
+            .sum();
+        let half_estimate = volume * half_sum / half as f64;
+        let error = (estimate - half_estimate).abs();
+
+        Ok(QuadratureResult {
+            value: estimate,
+            error_estimate: error,
+            num_evals: n + half,
+            converged: true,
+        })
+    }
 }
 
 /// Convenience: quasi-Monte Carlo integration using Sobol sequences.
@@ -379,4 +600,55 @@ mod tests {
         // integral of (x+y) over [0,1]^2 = 1
         assert!((result.value - 1.0).abs() < 0.05, "value={}", result.value);
     }
+
+    /// Parallel Sobol QMC produces identical results (deterministic sequence).
+    #[cfg(feature = "parallel")]
+    #[test]
+    fn sobol_par_matches_sequential() {
+        let f = |x: &[f64]| (x[0] + x[1] + x[2]).exp();
+        let integrator = MonteCarloIntegrator::default()
+            .with_method(MCMethod::Sobol)
+            .with_samples(5000);
+        let seq = integrator.integrate(&[0.0; 3], &[1.0; 3], &f).unwrap();
+        let par = integrator.integrate_par(&[0.0; 3], &[1.0; 3], &f).unwrap();
+        assert!(
+            (seq.value - par.value).abs() < 1e-12,
+            "seq={}, par={}",
+            seq.value,
+            par.value
+        );
+    }
+
+    /// Parallel Halton QMC produces identical results (deterministic sequence).
+    #[cfg(feature = "parallel")]
+    #[test]
+    fn halton_par_matches_sequential() {
+        let f = |x: &[f64]| x[0] * x[1];
+        let integrator = MonteCarloIntegrator::default()
+            .with_method(MCMethod::Halton)
+            .with_samples(5000);
+        let seq = integrator.integrate(&[0.0, 0.0], &[1.0, 1.0], &f).unwrap();
+        let par = integrator
+            .integrate_par(&[0.0, 0.0], &[1.0, 1.0], &f)
+            .unwrap();
+        assert!(
+            (seq.value - par.value).abs() < 1e-12,
+            "seq={}, par={}",
+            seq.value,
+            par.value
+        );
+    }
+
+    /// Parallel plain MC produces reasonable results (different PRNG split, so not identical).
+    #[cfg(feature = "parallel")]
+    #[test]
+    fn plain_mc_par_reasonable() {
+        let integrator = MonteCarloIntegrator::default()
+            .with_method(MCMethod::Plain)
+            .with_samples(10000);
+        let result = integrator
+            .integrate_par(&[0.0, 0.0], &[1.0, 1.0], |x| x[0] * x[1])
+            .unwrap();
+        assert!((result.value - 0.25).abs() < 0.05, "value={}", result.value);
+    }
 }
diff --git a/src/cubature/sobol.rs b/src/cubature/sobol.rs
index 69afc2f..3a2c551 100644
--- a/src/cubature/sobol.rs
+++ b/src/cubature/sobol.rs
@@ -7,6 +7,9 @@
 
 use crate::error::QuadratureError;
 
+#[cfg(not(feature = "std"))]
+use alloc::{vec, vec::Vec};
+
 /// Number of bits used for Sobol sequence generation.
 const BITS: u32 = 32;
 
diff --git a/src/cubature/sparse_grid.rs b/src/cubature/sparse_grid.rs
index e67b12d..a168df2 100644
--- a/src/cubature/sparse_grid.rs
+++ b/src/cubature/sparse_grid.rs
@@ -7,7 +7,15 @@
 //!
 //! Uses Clenshaw-Curtis rules as the nested 1D basis (the canonical choice).
 
-use std::collections::HashMap;
+#[cfg(not(feature = "std"))]
+use alloc::{vec, vec::Vec};
+#[cfg(not(feature = "std"))]
+use num_traits::Float as _;
+
+#[cfg(not(feature = "std"))]
+use alloc::collections::BTreeMap;
+#[cfg(feature = "std")]
+use std::collections::BTreeMap;
 
 use crate::cubature::CubatureRule;
 use crate::error::QuadratureError;
@@ -59,21 +67,25 @@ impl SparseGrid {
     }
 
     /// Returns a reference to the underlying cubature rule.
+    #[inline]
     pub fn rule(&self) -> &CubatureRule {
         &self.rule
     }
 
     /// Number of cubature points.
+    #[inline]
     pub fn num_points(&self) -> usize {
         self.rule.num_points()
     }
 
     /// Spatial dimension.
+    #[inline]
     pub fn dim(&self) -> usize {
         self.rule.dim()
     }
 
     /// Smolyak level.
+    #[inline]
     pub fn level(&self) -> usize {
         self.level
     }
@@ -101,7 +113,7 @@ fn cc_rule(n: usize) -> (Vec<f64>, Vec<f64>) {
 
     let nm1 = n - 1;
     let nm1_f = nm1 as f64;
-    let pi = std::f64::consts::PI;
+    let pi = core::f64::consts::PI;
 
     // Nodes: cos(k*pi/(n-1)) for k=0..n-1, reversed to ascending order
     let nodes: Vec<f64> = (0..n)
@@ -152,9 +164,9 @@ fn build_smolyak(dim: usize, level: usize) -> CubatureRule {
     let cc_rules: Vec<(Vec<f64>, Vec<f64>)> =
         (0..=max_level).map(|l| cc_rule(cc_order(l))).collect();
 
-    // Accumulate points via HashMap for exact merging
+    // Accumulate points via BTreeMap for exact merging
     // Key: quantised d-dimensional point
-    let mut point_map: HashMap<Vec<i64>, (Vec<f64>, f64)> = HashMap::new();
+    let mut point_map: BTreeMap<Vec<i64>, (Vec<f64>, f64)> = BTreeMap::new();
 
     // Enumerate all multi-indices l = (l_0, ..., l_{d-1}) with each l_j >= 0
     // and |l| in [max(level, dim) - dim, level]
@@ -237,8 +249,11 @@ fn build_smolyak(dim: usize, level: usize) -> CubatureRule {
     pairs.sort_by(|a, b| {
         a.0.iter()
             .zip(b.0.iter())
-            .find_map(|(x, y)| x.partial_cmp(y).filter(|o| *o != std::cmp::Ordering::Equal))
-            .unwrap_or(std::cmp::Ordering::Equal)
+            .find_map(|(x, y)| {
+                x.partial_cmp(y)
+                    .filter(|o| *o != core::cmp::Ordering::Equal)
+            })
+            .unwrap_or(core::cmp::Ordering::Equal)
     });
 
     let n = pairs.len();
@@ -395,7 +410,7 @@ mod tests {
                 (x[0] + x[1] + x[2]).exp()
             });
         // Exact: (e-1)^3 ≈ 5.07321...
-        let e_minus_1 = std::f64::consts::E - 1.0;
+        let e_minus_1 = core::f64::consts::E - 1.0;
         let expected = e_minus_1 * e_minus_1 * e_minus_1;
         assert!(
             (result - expected).abs() < 1e-6,
diff --git a/src/cubature/tensor.rs b/src/cubature/tensor.rs
index a8aae15..ceceb39 100644
--- a/src/cubature/tensor.rs
+++ b/src/cubature/tensor.rs
@@ -7,6 +7,9 @@ use crate::cubature::CubatureRule;
 use crate::error::QuadratureError;
 use crate::rule::QuadratureRule;
 
+#[cfg(not(feature = "std"))]
+use alloc::{vec, vec::Vec};
+
 /// A tensor-product cubature rule formed from 1D quadrature rules.
 ///
 /// # Example
@@ -80,16 +83,19 @@ impl TensorProductRule {
     }
 
     /// Returns a reference to the underlying cubature rule.
+    #[inline]
     pub fn rule(&self) -> &CubatureRule {
         &self.rule
     }
 
     /// Number of cubature points.
+    #[inline]
     pub fn num_points(&self) -> usize {
         self.rule.num_points()
     }
 
     /// Spatial dimension.
+    #[inline]
     pub fn dim(&self) -> usize {
         self.rule.dim()
     }
diff --git a/src/error.rs b/src/error.rs
index f2528f4..69fe2c3 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -27,4 +27,5 @@ impl fmt::Display for QuadratureError {
     }
 }
 
+#[cfg(feature = "std")]
 impl std::error::Error for QuadratureError {}
diff --git a/src/gauss_chebyshev.rs b/src/gauss_chebyshev.rs
index 4c23baa..3f5b2ad 100644
--- a/src/gauss_chebyshev.rs
+++ b/src/gauss_chebyshev.rs
@@ -10,11 +10,16 @@
 //!   - Nodes: `x_k = cos(k π / (n + 1))`, k = 1, ..., n
 //!   - Weights: `w_k = π / (n + 1) * sin²(k π / (n + 1))`
 
-use std::f64::consts::PI;
+use core::f64::consts::PI;
 
 use crate::error::QuadratureError;
 use crate::rule::QuadratureRule;
 
+#[cfg(not(feature = "std"))]
+use alloc::vec::Vec;
+#[cfg(not(feature = "std"))]
+use num_traits::Float as _;
+
 /// Gauss-Chebyshev Type I quadrature rule.
 ///
 /// Weight function `w(x) = 1 / sqrt(1 - x²)` on \[-1, 1\].
@@ -30,7 +35,7 @@ use crate::rule::QuadratureRule;
 /// assert_eq!(gc.order(), 10);
 /// // All weights equal pi/n for Type I
 /// for &w in gc.weights() {
-///     assert!((w - std::f64::consts::PI / 10.0).abs() < 1e-14);
+///     assert!((w - core::f64::consts::PI / 10.0).abs() < 1e-14);
 /// }
 /// ```
 #[derive(Debug, Clone)]
@@ -65,21 +70,25 @@ impl GaussChebyshevFirstKind {
     }
 
     /// Returns a reference to the underlying quadrature rule.
+    #[inline]
     pub fn rule(&self) -> &QuadratureRule<f64> {
         &self.rule
     }
 
     /// Returns the number of quadrature points.
+    #[inline]
     pub fn order(&self) -> usize {
         self.rule.order()
     }
 
     /// Returns the nodes on \[-1, 1\].
+    #[inline]
     pub fn nodes(&self) -> &[f64] {
         &self.rule.nodes
     }
 
     /// Returns the weights.
+    #[inline]
     pub fn weights(&self) -> &[f64] {
         &self.rule.weights
     }
@@ -130,21 +139,25 @@ impl GaussChebyshevSecondKind {
     }
 
     /// Returns a reference to the underlying quadrature rule.
+    #[inline]
     pub fn rule(&self) -> &QuadratureRule<f64> {
         &self.rule
     }
 
     /// Returns the number of quadrature points.
+    #[inline]
     pub fn order(&self) -> usize {
         self.rule.order()
     }
 
     /// Returns the nodes on \[-1, 1\].
+    #[inline]
     pub fn nodes(&self) -> &[f64] {
         &self.rule.nodes
     }
 
     /// Returns the weights.
+    #[inline]
     pub fn weights(&self) -> &[f64] {
         &self.rule.weights
     }
diff --git a/src/gauss_hermite.rs b/src/gauss_hermite.rs
index 60eba34..62eceff 100644
--- a/src/gauss_hermite.rs
+++ b/src/gauss_hermite.rs
@@ -9,6 +9,11 @@ use crate::error::QuadratureError;
 use crate::golub_welsch::golub_welsch;
 use crate::rule::QuadratureRule;
 
+#[cfg(not(feature = "std"))]
+use alloc::{vec, vec::Vec};
+#[cfg(not(feature = "std"))]
+use num_traits::Float as _;
+
 /// A Gauss-Hermite quadrature rule.
 ///
 /// Integrates `f(x) * e^(-x²)` over (-∞, ∞) using n points.
@@ -21,7 +26,7 @@ use crate::rule::QuadratureRule;
 /// // Integral of e^(-x^2) over (-inf, inf) = sqrt(pi)
 /// let gh = GaussHermite::new(10).unwrap();
 /// let result: f64 = gh.nodes().iter().zip(gh.weights()).map(|(_, &w)| w).sum();
-/// assert!((result - std::f64::consts::PI.sqrt()).abs() < 1e-12);
+/// assert!((result - core::f64::consts::PI.sqrt()).abs() < 1e-12);
 /// ```
 #[derive(Debug, Clone)]
 pub struct GaussHermite {
@@ -42,21 +47,25 @@ impl GaussHermite {
     }
 
     /// Returns a reference to the underlying quadrature rule.
+    #[inline]
     pub fn rule(&self) -> &QuadratureRule<f64> {
         &self.rule
     }
 
     /// Returns the number of quadrature points.
+    #[inline]
     pub fn order(&self) -> usize {
         self.rule.order()
     }
 
     /// Returns the nodes on (-∞, ∞).
+    #[inline]
     pub fn nodes(&self) -> &[f64] {
         &self.rule.nodes
     }
 
     /// Returns the weights.
+    #[inline]
     pub fn weights(&self) -> &[f64] {
         &self.rule.weights
     }
@@ -72,7 +81,7 @@ impl GaussHermite {
 fn compute_hermite(n: usize) -> (Vec<f64>, Vec<f64>) {
     let diag = vec![0.0; n];
     let off_diag_sq: Vec<f64> = (1..n).map(|k| k as f64 / 2.0).collect();
-    let mu0 = std::f64::consts::PI.sqrt();
+    let mu0 = core::f64::consts::PI.sqrt();
 
     golub_welsch(&diag, &off_diag_sq, mu0)
 }
@@ -80,7 +89,7 @@ fn compute_hermite(n: usize) -> (Vec<f64>, Vec<f64>) {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use std::f64::consts::PI;
+    use core::f64::consts::PI;
 
     #[test]
     fn zero_order() {
diff --git a/src/gauss_jacobi.rs b/src/gauss_jacobi.rs
index c2534e1..aa3d33c 100644
--- a/src/gauss_jacobi.rs
+++ b/src/gauss_jacobi.rs
@@ -16,6 +16,11 @@ use crate::error::QuadratureError;
 use crate::golub_welsch::golub_welsch;
 use crate::rule::QuadratureRule;
 
+#[cfg(not(feature = "std"))]
+use alloc::vec::Vec;
+#[cfg(not(feature = "std"))]
+use num_traits::Float as _;
+
 /// A Gauss-Jacobi quadrature rule.
 ///
 /// # Example
@@ -30,7 +35,7 @@ use crate::rule::QuadratureRule;
 /// // Weight sum = integral of (1-x)^alpha * (1+x)^beta over [-1,1]
 /// // For alpha=beta=0.5: B(1.5, 1.5) * 2^2 = pi/2
 /// let sum: f64 = gj.weights().iter().sum();
-/// assert!((sum - std::f64::consts::PI / 2.0).abs() < 1e-12);
+/// assert!((sum - core::f64::consts::PI / 2.0).abs() < 1e-12);
 /// ```
 #[derive(Debug, Clone)]
 pub struct GaussJacobi {
@@ -72,21 +77,25 @@ impl GaussJacobi {
     }
 
     /// Returns a reference to the underlying quadrature rule.
+    #[inline]
     pub fn rule(&self) -> &QuadratureRule<f64> {
         &self.rule
     }
 
     /// Returns the number of quadrature points.
+    #[inline]
     pub fn order(&self) -> usize {
         self.rule.order()
     }
 
     /// Returns the nodes on \[-1, 1\].
+    #[inline]
     pub fn nodes(&self) -> &[f64] {
         &self.rule.nodes
     }
 
     /// Returns the weights.
+    #[inline]
     pub fn weights(&self) -> &[f64] {
         &self.rule.weights
     }
@@ -144,7 +153,7 @@ fn compute_jacobi(n: usize, alpha: f64, beta: f64) -> (Vec<f64>, Vec<f64>) {
         .collect();
 
     // μ₀ = 2^(ab+1) Γ(α+1)Γ(β+1) / Γ(ab+2)
-    let mu0 = ((ab + 1.0) * std::f64::consts::LN_2 + ln_gamma(alpha + 1.0) + ln_gamma(beta + 1.0)
+    let mu0 = ((ab + 1.0) * core::f64::consts::LN_2 + ln_gamma(alpha + 1.0) + ln_gamma(beta + 1.0)
         - ln_gamma(ab + 2.0))
     .exp();
 
@@ -173,7 +182,7 @@ pub(crate) fn ln_gamma(x: f64) -> f64 {
     if x < 0.5 {
         // Reflection formula
         let z = 1.0 - x;
-        std::f64::consts::PI.ln() - (std::f64::consts::PI * x).sin().ln() - ln_gamma(z)
+        core::f64::consts::PI.ln() - (core::f64::consts::PI * x).sin().ln() - ln_gamma(z)
     } else {
         let z = x - 1.0;
         let mut sum = COEFF[0];
@@ -181,14 +190,14 @@ pub(crate) fn ln_gamma(x: f64) -> f64 {
             sum += c / (z + i as f64);
         }
         let t = z + 7.5; // g + 0.5
-        0.5 * (2.0 * std::f64::consts::PI).ln() + (t.ln() * (z + 0.5)) - t + sum.ln()
+        0.5 * (2.0 * core::f64::consts::PI).ln() + (t.ln() * (z + 0.5)) - t + sum.ln()
     }
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
-    use std::f64::consts::PI;
+    use core::f64::consts::PI;
 
     #[test]
     fn zero_order() {
@@ -231,7 +240,7 @@ mod tests {
     /// Helper: integral of (1-x)^a (1+x)^b over [-1,1].
     fn jacobi_integral(a: f64, b: f64) -> f64 {
         let log_val =
-            (a + b + 1.0) * std::f64::consts::LN_2 + ln_gamma(a + 1.0) + ln_gamma(b + 1.0)
+            (a + b + 1.0) * core::f64::consts::LN_2 + ln_gamma(a + 1.0) + ln_gamma(b + 1.0)
                 - ln_gamma(a + b + 2.0);
         log_val.exp()
     }
diff --git a/src/gauss_kronrod.rs b/src/gauss_kronrod.rs
index 925e169..181a0f6 100644
--- a/src/gauss_kronrod.rs
+++ b/src/gauss_kronrod.rs
@@ -14,6 +14,9 @@
 //! compile time.
 #![allow(clippy::excessive_precision)]
 
+#[cfg(not(feature = "std"))]
+use num_traits::Float as _;
+
 /// Detailed result from a single Gauss-Kronrod panel evaluation.
 ///
 /// Used internally by the adaptive integrator for error analysis.
@@ -42,6 +45,7 @@ pub enum GKPair {
 
 impl GKPair {
     /// Number of Kronrod nodes.
+    #[inline]
     pub fn kronrod_order(self) -> usize {
         match self {
             Self::G7K15 => 15,
@@ -53,6 +57,7 @@ impl GKPair {
     }
 
     /// Number of Gauss nodes embedded in the Kronrod rule.
+    #[inline]
     pub fn gauss_order(self) -> usize {
         match self {
             Self::G7K15 => 7,
@@ -78,7 +83,7 @@ impl GKPair {
 ///
 /// // Integrate sin(x) over [0, pi] with error estimate
 /// let (estimate, error) = gk.integrate(
-///     0.0, std::f64::consts::PI, |x: f64| x.sin()
+///     0.0, core::f64::consts::PI, |x: f64| x.sin()
 /// );
 /// assert!((estimate - 2.0).abs() < 1e-14);
 /// assert!(error < 1e-10);
@@ -110,6 +115,7 @@ impl GaussKronrod {
     }
 
     /// Returns the pair type.
+    #[inline]
     pub fn pair(&self) -> GKPair {
         self.pair
     }
@@ -596,7 +602,7 @@ mod tests {
     fn integrate_sin() {
         for pair in [GKPair::G7K15, GKPair::G10K21, GKPair::G15K31] {
             let gk = GaussKronrod::new(pair);
-            let (est, err) = gk.integrate(0.0, std::f64::consts::PI, f64::sin);
+            let (est, err) = gk.integrate(0.0, core::f64::consts::PI, f64::sin);
             assert!((est - 2.0).abs() < 1e-12, "{pair:?}: estimate = {est}");
             assert!(err < 1e-6, "{pair:?}: error estimate = {err}");
         }
diff --git a/src/gauss_laguerre.rs b/src/gauss_laguerre.rs
index 49513c7..df81eb0 100644
--- a/src/gauss_laguerre.rs
+++ b/src/gauss_laguerre.rs
@@ -12,6 +12,11 @@ use crate::gauss_jacobi::ln_gamma;
 use crate::golub_welsch::golub_welsch;
 use crate::rule::QuadratureRule;
 
+#[cfg(not(feature = "std"))]
+use alloc::vec::Vec;
+#[cfg(not(feature = "std"))]
+use num_traits::Float as _;
+
 /// A Gauss-Laguerre quadrature rule.
 ///
 /// Integrates `f(x) * x^α * e^(-x)` over \[0, ∞) using n points.
@@ -59,21 +64,25 @@ impl GaussLaguerre {
     }
 
     /// Returns a reference to the underlying quadrature rule.
+    #[inline]
     pub fn rule(&self) -> &QuadratureRule<f64> {
         &self.rule
     }
 
     /// Returns the number of quadrature points.
+    #[inline]
     pub fn order(&self) -> usize {
         self.rule.order()
     }
 
     /// Returns the nodes on \[0, ∞).
+    #[inline]
     pub fn nodes(&self) -> &[f64] {
         &self.rule.nodes
     }
 
     /// Returns the weights.
+    #[inline]
     pub fn weights(&self) -> &[f64] {
         &self.rule.weights
     }
diff --git a/src/gauss_legendre.rs b/src/gauss_legendre.rs
index 3c2510c..15799da 100644
--- a/src/gauss_legendre.rs
+++ b/src/gauss_legendre.rs
@@ -13,6 +13,11 @@
 use crate::error::QuadratureError;
 use crate::rule::QuadratureRule;
 
+#[cfg(not(feature = "std"))]
+use alloc::{vec, vec::Vec};
+#[cfg(not(feature = "std"))]
+use num_traits::Float as _;
+
 /// A Gauss-Legendre quadrature rule.
 ///
 /// Exact for polynomials of degree 2n - 1, where n is the number of points.
@@ -48,24 +53,45 @@ impl GaussLegendre {
     }
 
     /// Returns a reference to the underlying quadrature rule.
+    #[inline]
     pub fn rule(&self) -> &QuadratureRule<f64> {
         &self.rule
     }
 
     /// Returns the number of quadrature points.
+    #[inline]
     pub fn order(&self) -> usize {
         self.rule.order()
     }
 
     /// Returns the nodes on \[-1, 1\].
+    #[inline]
     pub fn nodes(&self) -> &[f64] {
         &self.rule.nodes
     }
 
     /// Returns the weights.
+    #[inline]
     pub fn weights(&self) -> &[f64] {
         &self.rule.weights
     }
+
+    /// Create a new n-point Gauss-Legendre rule with parallel node generation.
+    ///
+    /// Only beneficial for large n (> ~1000) where the Bogaert asymptotic
+    /// path is used. For small n, the overhead of thread scheduling exceeds
+    /// the computation time.
+    #[cfg(feature = "parallel")]
+    pub fn new_par(n: usize) -> Result<Self, QuadratureError> {
+        if n == 0 {
+            return Err(QuadratureError::ZeroOrder);
+        }
+
+        let (nodes, weights) = compute_gl_pair_par(n);
+        Ok(Self {
+            rule: QuadratureRule { nodes, weights },
+        })
+    }
 }
 
 // ---------------------------------------------------------------------------
@@ -107,7 +133,7 @@ fn compute_newton(n: usize, m: usize, nodes: &mut [f64], weights: &mut [f64]) {
     for i in 0..m {
         // Tricomi initial guess (1-indexed: k = i + 1)
         let k = (i + 1) as f64;
-        let theta = std::f64::consts::PI * (4.0 * k - 1.0) / (4.0 * nf + 2.0);
+        let theta = core::f64::consts::PI * (4.0 * k - 1.0) / (4.0 * nf + 2.0);
         let mut x = theta.cos();
 
         // Newton iteration on P_n(x) using the three-term recurrence
@@ -199,7 +225,7 @@ fn bessel_j0_zero(k: usize) -> f64 {
         BESSEL_J0_ZEROS[k - 1]
     } else {
         // McMahon's asymptotic expansion for large zeros of J_0
-        let z = std::f64::consts::PI * (k as f64 - 0.25);
+        let z = core::f64::consts::PI * (k as f64 - 0.25);
         let r = 1.0 / z;
         let r2 = r * r;
         z + r
@@ -349,6 +375,74 @@ fn bogaert_pair(n: usize, k: usize) -> (f64, f64) {
     (theta_refined, weight)
 }
 
+/// Compute all n nodes and weights in parallel.
+///
+/// Uses the same Newton/Bogaert split as the sequential version, but
+/// parallelises the per-node computation using rayon.
+#[cfg(feature = "parallel")]
+fn compute_gl_pair_par(n: usize) -> (Vec<f64>, Vec<f64>) {
+    use rayon::prelude::*;
+
+    let m = n.div_ceil(2);
+
+    if n <= ASYMPTOTIC_THRESHOLD {
+        // Newton path: each node is independent after computing the initial guess
+        let pairs: Vec<(usize, f64, f64)> = (0..m)
+            .into_par_iter()
+            .map(|i| {
+                let k = (i + 1) as f64;
+                let nf = n as f64;
+                let theta = core::f64::consts::PI * (4.0 * k - 1.0) / (4.0 * nf + 2.0);
+                let mut x = theta.cos();
+
+                for _ in 0..100 {
+                    let (p_n, p_n_deriv) = legendre_eval(n, x);
+                    let dx = -p_n / p_n_deriv;
+                    x += dx;
+                    if dx.abs() < 2.0 * f64::EPSILON * x.abs().max(1.0) {
+                        break;
+                    }
+                }
+
+                let (_, p_n_deriv) = legendre_eval(n, x);
+                let w = 2.0 / ((1.0 - x * x) * p_n_deriv * p_n_deriv);
+                (i, x, w)
+            })
+            .collect();
+
+        let mut nodes = vec![0.0_f64; n];
+        let mut weights = vec![0.0_f64; n];
+        for (i, x, w) in pairs {
+            nodes[n - 1 - i] = x;
+            weights[n - 1 - i] = w;
+            nodes[i] = -x;
+            weights[i] = w;
+        }
+        (nodes, weights)
+    } else {
+        // Bogaert path: embarrassingly parallel
+        let pairs: Vec<(usize, f64, f64)> = (0..m)
+            .into_par_iter()
+            .map(|i| {
+                let (theta, weight) = bogaert_pair(n, i + 1);
+                (i, theta.cos(), weight)
+            })
+            .collect();
+
+        let mut nodes = vec![0.0_f64; n];
+        let mut weights = vec![0.0_f64; n];
+        for (i, x, w) in pairs {
+            nodes[n - 1 - i] = x;
+            weights[n - 1 - i] = w;
+            if i != n - 1 - i {
+                nodes[i] = -x;
+                weights[i] = w;
+            }
+        }
+        (nodes, weights)
+    }
+}
+
 /// Compute GL nodes/weights via Bogaert's asymptotic expansion.
 fn compute_bogaert(n: usize, m: usize, nodes: &mut [f64], weights: &mut [f64]) {
     for i in 0..m {
@@ -504,7 +598,7 @@ mod tests {
     #[test]
     fn integrate_sin_on_zero_to_pi() {
         let gl = GaussLegendre::new(20).unwrap();
-        let result = gl.rule().integrate(0.0, std::f64::consts::PI, f64::sin);
+        let result = gl.rule().integrate(0.0, core::f64::consts::PI, f64::sin);
         assert!((result - 2.0).abs() < 1e-13, "got {result}, expected 2.0");
     }
 
@@ -552,4 +646,38 @@ mod tests {
             );
         }
     }
+
+    /// Parallel node generation matches sequential for both Newton and Bogaert paths.
+    #[cfg(feature = "parallel")]
+    #[test]
+    fn new_par_matches_sequential() {
+        for n in [5, 50, 100, 200, 1000] {
+            let seq = GaussLegendre::new(n).unwrap();
+            let par = GaussLegendre::new_par(n).unwrap();
+            for i in 0..n {
+                let node_err = (seq.nodes()[i] - par.nodes()[i]).abs();
+                let weight_err = (seq.weights()[i] - par.weights()[i]).abs();
+                assert!(node_err < 1e-15, "n={n}, i={i}: node diff = {node_err}");
+                assert!(
+                    weight_err < 1e-15,
+                    "n={n}, i={i}: weight diff = {weight_err}"
+                );
+            }
+        }
+    }
+
+    /// Parallel composite integration matches sequential.
+    #[cfg(feature = "parallel")]
+    #[test]
+    fn composite_par_matches_sequential() {
+        let gl = GaussLegendre::new(5).unwrap();
+        let f = |x: f64| x.sin();
+        let seq = gl
+            .rule()
+            .integrate_composite(0.0, core::f64::consts::PI, 100, f);
+        let par = gl
+            .rule()
+            .integrate_composite_par(0.0, core::f64::consts::PI, 100, f);
+        assert!((seq - par).abs() < 1e-14, "seq={seq}, par={par}");
+    }
 }
diff --git a/src/gauss_lobatto.rs b/src/gauss_lobatto.rs
index 0c846d8..c11a929 100644
--- a/src/gauss_lobatto.rs
+++ b/src/gauss_lobatto.rs
@@ -10,6 +10,11 @@ use crate::error::QuadratureError;
 use crate::gauss_legendre::legendre_eval;
 use crate::rule::QuadratureRule;
 
+#[cfg(not(feature = "std"))]
+use alloc::{vec, vec::Vec};
+#[cfg(not(feature = "std"))]
+use num_traits::Float as _;
+
 /// A Gauss-Lobatto quadrature rule on \[-1, 1\].
 ///
 /// # Example
@@ -49,21 +54,25 @@ impl GaussLobatto {
     }
 
     /// Returns a reference to the underlying quadrature rule.
+    #[inline]
     pub fn rule(&self) -> &QuadratureRule<f64> {
         &self.rule
     }
 
     /// Returns the number of quadrature points.
+    #[inline]
     pub fn order(&self) -> usize {
         self.rule.order()
     }
 
     /// Returns the nodes on \[-1, 1\].
+    #[inline]
     pub fn nodes(&self) -> &[f64] {
         &self.rule.nodes
     }
 
     /// Returns the weights.
+    #[inline]
     pub fn weights(&self) -> &[f64] {
         &self.rule.weights
     }
@@ -95,7 +104,7 @@ fn compute_lobatto(n: usize) -> (Vec<f64>, Vec<f64>) {
     let m = n - 2; // number of interior nodes
     for k in 0..m {
         // Initial guess: Chebyshev-type
-        let theta = std::f64::consts::PI * (k as f64 + 1.0) / (m as f64 + 1.0);
+        let theta = core::f64::consts::PI * (k as f64 + 1.0) / (m as f64 + 1.0);
         let mut x = -(theta.cos());
 
         // Newton on P'_{n-1}(x) = 0.
diff --git a/src/gauss_radau.rs b/src/gauss_radau.rs
index 5fb8730..b22c2e1 100644
--- a/src/gauss_radau.rs
+++ b/src/gauss_radau.rs
@@ -13,6 +13,9 @@ use crate::error::QuadratureError;
 use crate::golub_welsch::{golub_welsch, radau_modify};
 use crate::rule::QuadratureRule;
 
+#[cfg(not(feature = "std"))]
+use alloc::{vec, vec::Vec};
+
 /// A Gauss-Radau quadrature rule on \[-1, 1\].
 ///
 /// # Example
@@ -64,21 +67,25 @@ impl GaussRadau {
     }
 
     /// Returns a reference to the underlying quadrature rule.
+    #[inline]
     pub fn rule(&self) -> &QuadratureRule<f64> {
         &self.rule
     }
 
     /// Returns the number of quadrature points.
+    #[inline]
     pub fn order(&self) -> usize {
         self.rule.order()
     }
 
     /// Returns the nodes on \[-1, 1\].
+    #[inline]
     pub fn nodes(&self) -> &[f64] {
         &self.rule.nodes
     }
 
     /// Returns the weights.
+    #[inline]
     pub fn weights(&self) -> &[f64] {
         &self.rule.weights
     }
diff --git a/src/golub_welsch.rs b/src/golub_welsch.rs
index 2d63e47..d11f0b9 100644
--- a/src/golub_welsch.rs
+++ b/src/golub_welsch.rs
@@ -9,6 +9,11 @@
 //! Reference: Golub & Welsch (1969), "Calculation of Gauss Quadrature Rules",
 //! Mathematics of Computation 23(106), 221-230.
 
+#[cfg(not(feature = "std"))]
+use alloc::{vec, vec::Vec};
+#[cfg(not(feature = "std"))]
+use num_traits::Float as _;
+
 /// Compute quadrature nodes and weights from three-term recurrence coefficients.
 ///
 /// # Arguments
diff --git a/src/lib.rs b/src/lib.rs
index a5b6ce6..c2948d3 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,4 @@
+#![cfg_attr(not(feature = "std"), no_std)]
 //! # bilby
 //!
 //! A high-performance numerical quadrature (integration) library for Rust.
@@ -12,6 +13,14 @@
 //! - **Precomputed rules** — generate nodes and weights once, integrate many times
 //! - **No heap allocation on hot paths** — rule construction allocates, integration does not
 //! - **Separate 1D and N-D APIs** — `Fn(F) -> F` for 1D, `Fn(&[F]) -> F` for N-D
+//! - **`no_std` compatible** — works without the standard library (with `alloc`)
+//!
+//! ## Feature Flags
+//!
+//! | Feature | Default | Description |
+//! |---------|---------|-------------|
+//! | `std` | Yes | Enables `std::error::Error` impl and [`cache`] module |
+//! | `parallel` | No | Enables rayon-based `_par` methods (implies `std`) |
 //!
 //! ## Quick Start
 //!
@@ -32,7 +41,7 @@
 //! use bilby::{GaussKronrod, GKPair};
 //!
 //! let gk = GaussKronrod::new(GKPair::G7K15);
-//! let (estimate, error) = gk.integrate(0.0, std::f64::consts::PI, f64::sin);
+//! let (estimate, error) = gk.integrate(0.0, core::f64::consts::PI, f64::sin);
 //! assert!((estimate - 2.0).abs() < 1e-14);
 //! ```
 //!
@@ -41,7 +50,7 @@
 //! ```
 //! use bilby::adaptive_integrate;
 //!
-//! let result = adaptive_integrate(|x: f64| x.sin(), 0.0, std::f64::consts::PI, 1e-12).unwrap();
+//! let result = adaptive_integrate(|x: f64| x.sin(), 0.0, core::f64::consts::PI, 1e-12).unwrap();
 //! assert!((result.value - 2.0).abs() < 1e-12);
 //! assert!(result.is_converged());
 //! ```
@@ -53,10 +62,44 @@
 //!
 //! // Integral of exp(-x^2) over (-inf, inf) = sqrt(pi)
 //! let result = integrate_infinite(|x: f64| (-x * x).exp(), 1e-10).unwrap();
-//! assert!((result.value - std::f64::consts::PI.sqrt()).abs() < 1e-8);
+//! assert!((result.value - core::f64::consts::PI.sqrt()).abs() < 1e-8);
+//! ```
+//!
+//! ## Multi-Dimensional Integration
+//!
+//! ```
+//! use bilby::cubature::{TensorProductRule, adaptive_cubature};
+//! use bilby::GaussLegendre;
+//!
+//! // Tensor product: 10-point GL in each of 2 dimensions
+//! let gl = GaussLegendre::new(10).unwrap();
+//! let tp = TensorProductRule::isotropic(gl.rule(), 2).unwrap();
+//! let result = tp.rule().integrate_box(
+//!     &[0.0, 0.0], &[1.0, 1.0],
+//!     |x| x[0] * x[1],
+//! );
+//! assert!((result - 0.25).abs() < 1e-14);
 //! ```
+//!
+//! ## Precomputed Rule Cache
+//!
+//! Available when the `std` feature is enabled (default):
+//!
+//! ```
+//! # #[cfg(feature = "std")] {
+//! use bilby::cache::GL10;
+//!
+//! let result = GL10.rule().integrate(0.0, 1.0, |x: f64| x * x);
+//! assert!((result - 1.0 / 3.0).abs() < 1e-14);
+//! # }
+//! ```
+
+#[cfg(not(feature = "std"))]
+extern crate alloc;
 
 pub mod adaptive;
+#[cfg(feature = "std")]
+pub mod cache;
 pub mod cauchy_pv;
 pub mod clenshaw_curtis;
 pub mod cubature;
diff --git a/src/oscillatory.rs b/src/oscillatory.rs
index 720dc97..bcb8617 100644
--- a/src/oscillatory.rs
+++ b/src/oscillatory.rs
@@ -22,6 +22,11 @@ use crate::error::QuadratureError;
 use crate::gauss_legendre::GaussLegendre;
 use crate::result::QuadratureResult;
 
+#[cfg(not(feature = "std"))]
+use alloc::{boxed::Box, vec, vec::Vec};
+#[cfg(not(feature = "std"))]
+use num_traits::Float as _;
+
 /// Type of oscillatory kernel.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum OscillatoryKernel {
@@ -120,7 +125,7 @@ impl OscillatoryIntegrator {
         // Step 1: Evaluate f at Clenshaw-Curtis nodes on [-1, 1]
         let f_vals: Vec<f64> = (0..=n)
             .map(|k| {
-                let t = (k as f64 * std::f64::consts::PI / n as f64).cos();
+                let t = (k as f64 * core::f64::consts::PI / n as f64).cos();
                 f(mid + half * t)
             })
             .collect();
@@ -220,7 +225,7 @@ impl OscillatoryIntegrator {
 /// c_n are halved).
 fn chebyshev_coefficients(f_vals: &[f64], n: usize) -> Vec<f64> {
     let mut coeffs = vec![0.0; n + 1];
-    let pi_n = std::f64::consts::PI / n as f64;
+    let pi_n = core::f64::consts::PI / n as f64;
 
     for (j, cj) in coeffs.iter_mut().enumerate() {
         let mut sum = 0.0;
@@ -342,7 +347,7 @@ mod tests {
     fn sin_trivial() {
         // ∫₀^π sin(x) dx = 2  (ω=1, f=1)
         let result =
-            integrate_oscillatory_sin(|_| 1.0, 0.0, std::f64::consts::PI, 1.0, 1e-10).unwrap();
+            integrate_oscillatory_sin(|_| 1.0, 0.0, core::f64::consts::PI, 1.0, 1e-10).unwrap();
         assert!((result.value - 2.0).abs() < 1e-6, "value={}", result.value);
     }
 
@@ -374,7 +379,7 @@ mod tests {
     fn cos_with_linear_f() {
         // ∫₀^π x cos(x) dx = [x sin(x) + cos(x)]₀^π = -1 - 1 = -2
         let result =
-            integrate_oscillatory_cos(|x| x, 0.0, std::f64::consts::PI, 1.0, 1e-8).unwrap();
+            integrate_oscillatory_cos(|x| x, 0.0, core::f64::consts::PI, 1.0, 1e-8).unwrap();
         assert!(
             (result.value - (-2.0)).abs() < 1e-4,
             "value={}",
@@ -389,7 +394,7 @@ mod tests {
         let r = 1.0;
         let i = omega;
         // e^(1+iω) = e * (cos(ω) + i sin(ω))
-        let e = std::f64::consts::E;
+        let e = core::f64::consts::E;
         let re_num = e * omega.cos() - 1.0;
         let im_num = e * omega.sin();
         let denom = r * r + i * i; // 1 + ω²
diff --git a/src/result.rs b/src/result.rs
index b03fc19..db8547c 100644
--- a/src/result.rs
+++ b/src/result.rs
@@ -21,6 +21,7 @@ pub struct QuadratureResult<F> {
 
 impl<F: Float> QuadratureResult<F> {
     /// Returns `true` if the integration converged within tolerance.
+    #[inline]
     pub fn is_converged(&self) -> bool {
         self.converged
     }
diff --git a/src/rule.rs b/src/rule.rs
index e64d876..fe873d3 100644
--- a/src/rule.rs
+++ b/src/rule.rs
@@ -2,6 +2,9 @@
 
 use num_traits::Float;
 
+#[cfg(not(feature = "std"))]
+use alloc::vec::Vec;
+
 /// A precomputed quadrature rule on the reference interval \[-1, 1\].
 ///
 /// Stores nodes and weights that can be reused across many integrations.
@@ -16,6 +19,7 @@ pub struct QuadratureRule<F> {
 
 impl<F: Float> QuadratureRule<F> {
     /// Returns the number of points in this rule.
+    #[inline]
     pub fn order(&self) -> usize {
         self.nodes.len()
     }
@@ -26,6 +30,7 @@ impl<F: Float> QuadratureRule<F> {
     ///   x = (b - a) / 2 * t + (a + b) / 2
     ///
     /// The result is scaled by (b - a) / 2.
+    #[inline]
     pub fn integrate<G>(&self, a: F, b: F, f: G) -> F
     where
         G: Fn(F) -> F,
@@ -61,4 +66,29 @@ impl<F: Float> QuadratureRule<F> {
         }
         total
     }
+
+    /// Parallel composite integration over \[a, b\] with `n_panels` subintervals.
+    ///
+    /// Identical to [`integrate_composite`](Self::integrate_composite) but evaluates
+    /// panels in parallel using rayon. Requires `F: Send + Sync` and `f: Fn(F) -> F + Sync`.
+    #[cfg(feature = "parallel")]
+    pub fn integrate_composite_par<G>(&self, a: F, b: F, n_panels: usize, f: G) -> F
+    where
+        F: Send + Sync,
+        G: Fn(F) -> F + Sync,
+    {
+        use rayon::prelude::*;
+
+        let n = F::from(n_panels).unwrap();
+        let panel_width = (b - a) / n;
+
+        (0..n_panels)
+            .into_par_iter()
+            .map(|i| {
+                let panel_a = a + F::from(i).unwrap() * panel_width;
+                let panel_b = panel_a + panel_width;
+                self.integrate(panel_a, panel_b, &f)
+            })
+            .reduce(F::zero, |a, b| a + b)
+    }
 }
diff --git a/src/tanh_sinh.rs b/src/tanh_sinh.rs
index 3962cbc..66433b7 100644
--- a/src/tanh_sinh.rs
+++ b/src/tanh_sinh.rs
@@ -20,6 +20,9 @@
 use crate::error::QuadratureError;
 use crate::result::QuadratureResult;
 
+#[cfg(not(feature = "std"))]
+use num_traits::Float as _;
+
 /// Builder for tanh-sinh (double-exponential) quadrature.
 ///
 /// # Example
@@ -93,7 +96,7 @@ impl TanhSinh {
 
         let mid = 0.5 * (a + b);
         let half = 0.5 * (b - a);
-        let pi_2 = std::f64::consts::FRAC_PI_2;
+        let pi_2 = core::f64::consts::FRAC_PI_2;
 
         let mut total_evals = 0usize;
         let mut prev_estimate: f64 = 0.0;
@@ -305,7 +308,7 @@ mod tests {
         // ∫₀¹ 1/√(x(1-x)) dx = π
         let result = tanh_sinh_integrate(|x| 1.0 / (x * (1.0 - x)).sqrt(), 0.0, 1.0, 1e-8).unwrap();
         assert!(
-            (result.value - std::f64::consts::PI).abs() < 1e-6,
+            (result.value - core::f64::consts::PI).abs() < 1e-6,
             "value={}",
             result.value
         );
@@ -316,7 +319,7 @@ mod tests {
         // ∫₋₁¹ 1/√(1-x²) dx = π
         let result = tanh_sinh_integrate(|x| 1.0 / (1.0 - x * x).sqrt(), -1.0, 1.0, 1e-8).unwrap();
         assert!(
-            (result.value - std::f64::consts::PI).abs() < 1e-6,
+            (result.value - core::f64::consts::PI).abs() < 1e-6,
             "value={}",
             result.value
         );
@@ -325,7 +328,7 @@ mod tests {
     #[test]
     fn sin_integral() {
         // ∫₀^π sin(x) dx = 2  (smooth, sanity check)
-        let result = tanh_sinh_integrate(|x| x.sin(), 0.0, std::f64::consts::PI, 1e-10).unwrap();
+        let result = tanh_sinh_integrate(|x| x.sin(), 0.0, core::f64::consts::PI, 1e-10).unwrap();
         assert!((result.value - 2.0).abs() < 1e-8, "value={}", result.value);
     }
 
diff --git a/src/transforms.rs b/src/transforms.rs
index d7ea596..c3f5f9b 100644
--- a/src/transforms.rs
+++ b/src/transforms.rs
@@ -148,7 +148,7 @@ where
 ///
 /// // Integral of exp(-x^2) over (-inf, inf) = sqrt(pi)
 /// let r = integrate_infinite(|x: f64| (-x * x).exp(), 1e-10).unwrap();
-/// assert!((r.value - std::f64::consts::PI.sqrt()).abs() < 1e-8);
+/// assert!((r.value - core::f64::consts::PI.sqrt()).abs() < 1e-8);
 /// ```
 pub fn integrate_infinite<G>(f: G, tol: f64) -> Result<QuadratureResult<f64>, QuadratureError>
 where
@@ -177,7 +177,7 @@ where
 #[cfg(test)]
 mod tests {
     use super::*;
-    use std::f64::consts::PI;
+    use core::f64::consts::PI;
 
     /// exp(-x) over [0, inf) = 1.
     #[test]
diff --git a/src/weighted.rs b/src/weighted.rs
index cfe81b5..4cd1a9a 100644
--- a/src/weighted.rs
+++ b/src/weighted.rs
@@ -10,7 +10,7 @@
 //!
 //! // ∫₋₁¹ 1/√(1-x²) dx = π  (Chebyshev Type I weight)
 //! let result = weighted_integrate(|_| 1.0, WeightFunction::ChebyshevI, 20).unwrap();
-//! assert!((result - std::f64::consts::PI).abs() < 1e-12);
+//! assert!((result - core::f64::consts::PI).abs() < 1e-12);
 //! ```
 
 use crate::error::QuadratureError;
@@ -19,6 +19,9 @@ use crate::gauss_hermite::GaussHermite;
 use crate::gauss_jacobi::GaussJacobi;
 use crate::gauss_laguerre::GaussLaguerre;
 
+#[cfg(not(feature = "std"))]
+use num_traits::Float as _;
+
 /// Known weight functions for product integration.
 #[derive(Debug, Clone)]
 pub enum WeightFunction {
@@ -46,7 +49,7 @@ pub enum WeightFunction {
 /// let wi = WeightedIntegrator::new(WeightFunction::Hermite, 20).unwrap();
 /// // ∫₋∞^∞ 1 · e^(-x²) dx = √π
 /// let result = wi.integrate(|_| 1.0);
-/// assert!((result - std::f64::consts::PI.sqrt()).abs() < 1e-12);
+/// assert!((result - core::f64::consts::PI.sqrt()).abs() < 1e-12);
 /// ```
 #[derive(Debug, Clone)]
 pub struct WeightedIntegrator {
@@ -208,7 +211,7 @@ mod tests {
         )
         .unwrap();
         assert!(
-            (result - std::f64::consts::FRAC_PI_2).abs() < 1e-10,
+            (result - core::f64::consts::FRAC_PI_2).abs() < 1e-10,
             "result={result}"
         );
     }
@@ -234,7 +237,7 @@ mod tests {
         // ∫₋∞^∞ e^(-x²) dx = √π
         let result = weighted_integrate(|_| 1.0, WeightFunction::Hermite, 10).unwrap();
         assert!(
-            (result - std::f64::consts::PI.sqrt()).abs() < 1e-10,
+            (result - core::f64::consts::PI.sqrt()).abs() < 1e-10,
             "result={result}"
         );
     }
@@ -244,7 +247,7 @@ mod tests {
         // ∫₋₁¹ 1/√(1-x²) dx = π
         let result = weighted_integrate(|_| 1.0, WeightFunction::ChebyshevI, 20).unwrap();
         assert!(
-            (result - std::f64::consts::PI).abs() < 1e-12,
+            (result - core::f64::consts::PI).abs() < 1e-12,
             "result={result}"
         );
     }
@@ -254,7 +257,7 @@ mod tests {
         // ∫₋₁¹ √(1-x²) dx = π/2
         let result = weighted_integrate(|_| 1.0, WeightFunction::ChebyshevII, 20).unwrap();
         assert!(
-            (result - std::f64::consts::FRAC_PI_2).abs() < 1e-12,
+            (result - core::f64::consts::FRAC_PI_2).abs() < 1e-12,
             "result={result}"
         );
     }