diff --git a/Cargo.toml b/Cargo.toml index 457fc81..de7a897 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,13 +14,31 @@ categories = ["mathematics", "science", "algorithms"] [lib] bench = false +[features] +default = ["std"] +std = [] +parallel = ["dep:rayon", "std"] + [dependencies] -num-traits = "0.2" +num-traits = { version = "0.2", default-features = false, features = ["libm"] } +rayon = { version = "1", optional = true } [dev-dependencies] criterion = { version = "0.8", features = ["html_reports"] } approx = "0.5" +[[bench]] +name = "node_generation" +harness = false + +[[bench]] +name = "integration_1d" +harness = false + +[[bench]] +name = "cubature" +harness = false + [package.metadata.docs.rs] all-features = true rustdoc-args = ["--cfg", "docsrs"] diff --git a/README.md b/README.md index f7579de..4dcac00 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,12 @@ A high-performance numerical quadrature (integration) library for Rust. -- **Gauss-Legendre** -- O(1) per-node generation via the Bogaert (2014) algorithm -- **Gauss-Kronrod** -- embedded pairs (G7-K15 through G25-K51) for error estimation +- **Comprehensive** -- Gauss-Legendre, Gauss-Kronrod, Jacobi, Hermite, Laguerre, Chebyshev, Radau, Lobatto, Clenshaw-Curtis, tanh-sinh, oscillatory, Cauchy PV +- **Adaptive integration** -- QUADPACK-style error-driven refinement with configurable GK pairs +- **Multi-dimensional** -- tensor products, Smolyak sparse grids, Genz-Malik adaptive cubature, quasi-Monte Carlo (Sobol, Halton) - **Generic over `F: Float`** -- works with `f32`, `f64`, and AD types (e.g., [echidna](https://github.com/Entrolution/echidna)) +- **`no_std` compatible** -- works without the standard library (with `alloc`) +- **Optional parallelism** -- `parallel` feature for rayon-based `_par` methods - **Precomputed rules** -- generate nodes/weights once, integrate many times with zero allocation ## Quick Start @@ -22,37 +25,53 @@ Add to `Cargo.toml`: bilby = "0.1" ``` -```rust,ignore +```rust use bilby::GaussLegendre; // Create a 10-point Gauss-Legendre rule -let gl = GaussLegendre::new(10); +let gl = GaussLegendre::new(10).unwrap(); -// Integrate x^2 over [0, 1] -let result = gl.integrate(0.0, 1.0, |x| x * x); +// Integrate x^2 over [0, 1] (exact result = 1/3) +let result = gl.rule().integrate(0.0, 1.0, |x: f64| x * x); assert!((result - 1.0 / 3.0).abs() < 1e-14); ``` -## Roadmap +## Features -bilby is being developed in phases. See [ROADMAP.md](ROADMAP.md) for details. +| Feature | Default | Description | +|---------|---------|-------------| +| `std` | Yes | Enables `std::error::Error` impl and `cache` module | +| `parallel` | No | Enables rayon-based `_par` methods (implies `std`) | -| Phase | Version | Content | -|-------|---------|---------| -| 0 | v0.1.0 | Gauss-Legendre (Bogaert), Gauss-Kronrod pairs, fixed-order integration | -| 1 | v0.2.0 | Adaptive integration (QUADPACK-style), infinite domains, error control | -| 2 | v0.3.0 | Classical rule families (Jacobi, Laguerre, Hermite, Chebyshev, Radau, Lobatto, Clenshaw-Curtis) | -| 3 | v0.4.0 | Multi-dimensional (tensor product, sparse grids, adaptive cubature, quasi-Monte Carlo) | -| 4 | v0.5.0 | Specialty methods (oscillatory, tanh-sinh, Cauchy principal value) | -| 5 | v1.0.0 | Parallelism, caching, `no_std`, benchmarks | +### `no_std` support + +bilby works in `no_std` environments (with `alloc`). Disable default features: + +```toml +[dependencies] +bilby = { version = "0.1", default-features = false } +``` + +### Parallelism + +Enable the `parallel` feature for parallel variants of integration methods: + +```toml +[dependencies] +bilby = { version = "0.1", features = ["parallel"] } +``` + +This provides `integrate_composite_par`, `integrate_par`, `integrate_box_par`, +`MonteCarloIntegrator::integrate_par`, and `GaussLegendre::new_par`. ## Development ```bash -cargo test # Run tests -cargo bench # Run benchmarks -cargo clippy # Lint -cargo fmt # Format +cargo test # Run tests (default features) +cargo test --all-features # Run tests including parallel +cargo test --no-default-features # Run tests in no_std mode +cargo bench # Run benchmarks +cargo clippy --all-features # Lint ``` ## License diff --git a/ROADMAP.md b/ROADMAP.md deleted file mode 100644 index df59ab7..0000000 --- a/ROADMAP.md +++ /dev/null @@ -1,220 +0,0 @@ -# bilby — Numerical Quadrature for Rust - -A high-performance, comprehensive numerical integration library for Rust. - -**Design principles:** -- Generic over `F: Float` (composable with echidna AD types out of the box) -- No heap allocation on hot paths where possible -- Precomputable rules: separate node/weight generation from integration -- Consistent API: all rules return `(nodes, weights)` or `Result` -- Feature-gated heavy dependencies (rayon, etc.) - -## Phase 0: Foundation - -Core types, trait design, and the first two rule families. This phase defines the API that everything else builds on. - -### 0.1 Project scaffolding ✅ -- `cargo init --lib`, dual MIT/Apache-2.0, MSRV 1.93 -- CI (clippy, fmt, test, MSRV check, coverage, security audit, publish workflow) -- `QuadratureRule` struct with `nodes: Vec`, `weights: Vec` -- `QuadratureError` enum (ZeroOrder, DegenerateInterval, NotConverged) - -### 0.2 Gauss-Legendre (Bogaert algorithm) ✅ -- Newton iteration on Legendre recurrence for n <= 100 (Tricomi initial guess) -- Bogaert (2014) asymptotic expansion for n > 100 (O(1) per node via Bessel zeros + Chebyshev interpolants) -- `GaussLegendre::new(n) -> Result` -- 16-digit accuracy verified against exact polynomial integration up to degree 2n-1 - -### 0.3 Gauss-Kronrod (embedded pairs) ✅ -- G7-K15, G10-K21, G15-K31, G20-K41, G25-K51 pairs -- Canonical QUADPACK coefficients (from netlib Fortran source) -- Returns `(estimate, error_estimate)` with QUADPACK error heuristic -- `GaussKronrod::new(pair)` with static coefficient slices (no allocation) - -### 0.4 Basic integration API ✅ -- `QuadratureRule::integrate(a, b, f)` — affine transform from [-1, 1] to [a, b] -- `QuadratureRule::integrate_composite(a, b, n_panels, f)` — composite rule -- `GaussKronrod::integrate(a, b, f)` — returns (estimate, error_estimate) -- 22 unit tests + 4 doc tests - -**Milestone: v0.1.0** — Gauss-Legendre + Gauss-Kronrod, fixed-order integration. - -## Phase 1: Adaptive Integration - -The workhorse feature. Most real integration problems need error-driven refinement. - -### 1.1 Global adaptive subdivision (QUADPACK-style) ✅ -- `BinaryHeap`-based priority queue of subintervals ordered by error estimate -- Bisects the interval with largest error, re-evaluates both halves via GK pairs -- Terminates when `error <= max(abs_tol, rel_tol * |estimate|)` or max evaluations reached -- `AdaptiveIntegrator` builder with configurable GK pair, tolerances, and eval budget -- `adaptive_integrate(f, a, b, tol)` convenience function - -### 1.2 Singularity handling ✅ -- User-specified break points via `adaptive_integrate_with_breaks(f, a, b, breaks, tol)` -- Break points seed the priority queue as separate sub-intervals for global adaptive refinement -- Validation: breaks must be within (a, b), deduplicated and sorted automatically -- Endpoint singularity detection deferred to future refinement - -### 1.3 Semi-infinite and infinite intervals ✅ -- Domain transforms: [a, ∞) via x = a + t/(1-t), (-∞, b] via x = b - t/(1-t), (-∞, ∞) via x = t/(1-t²) -- `integrate_semi_infinite_upper(f, a, tol)`, `integrate_semi_infinite_lower(f, b, tol)`, `integrate_infinite(f, tol)` -- Delegates to adaptive quadrature on the transformed finite domain -- Gauss-Laguerre/Hermite as dedicated rule families deferred to Phase 2 - -### 1.4 Error reporting ✅ -- `QuadratureResult` struct: value, error_estimate, num_evals, converged flag -- Non-convergence signalled via `converged: false` (always returns best estimate) -- `QuadratureError` simplified: `NotConverged` removed, `InvalidInput` added -- 43 unit tests + 13 doc tests - -**Milestone: v0.2.0** — Adaptive integration with error control, infinite domains. - -## Phase 2: Classical Rule Families - -Complete the Gaussian quadrature family. - -### 2.1 Gauss-Jacobi ✅ -- Weight function (1-x)^alpha * (1+x)^beta on [-1, 1] -- Generalises Legendre (alpha=beta=0), Chebyshev (alpha=beta=-0.5), Gegenbauer -- Golub-Welsch algorithm: eigenvalues of symmetric tridiagonal Jacobi matrix -- Handles all valid parameters including the singular Chebyshev limit (alpha+beta=-1) - -### 2.2 Gauss-Laguerre (generalised) ✅ -- Weight function x^alpha * e^(-x) on [0, inf) -- Golub-Welsch algorithm via Laguerre three-term recurrence coefficients - -### 2.3 Gauss-Hermite ✅ -- Weight function e^(-x^2) on (-inf, inf) -- Golub-Welsch algorithm via physicists' Hermite recurrence coefficients - -### 2.4 Gauss-Chebyshev (types I and II) ✅ -- Closed-form nodes and weights (no iteration needed) -- Useful as comparison/baseline and for Chebyshev interpolation - -### 2.5 Gauss-Radau and Gauss-Lobatto ✅ -- Gauss-Radau: Golub-Welsch with Radau modification of Legendre Jacobi matrix -- Gauss-Lobatto: Newton on P'_{n-1}(x) using Legendre ODE second derivative -- Left/right variants for Radau, both endpoints for Lobatto - -### 2.6 Clenshaw-Curtis ✅ -- Nodes at Chebyshev extrema cos(kπ/(n-1)), weights via explicit DCT formula -- Nested: degree doubling reuses previous evaluations -- Competitive with Gauss for smooth functions, better for adaptive refinement - -### 2.7 Golub-Welsch eigenvalue solver ✅ -- Shared `pub(crate)` module: symmetric tridiagonal QL algorithm with implicit shifts -- Radau modification via continued fraction on characteristic polynomial -- Used by Gauss-Jacobi, Gauss-Hermite, Gauss-Laguerre, and Gauss-Radau -- 98 unit tests + 21 doc tests - -**Milestone: v0.3.0** — Full classical quadrature family. - -## Phase 3: Multi-Dimensional Integration - -### 3.1 Tensor product rules ✅ -- `TensorProductRule::new(&[&QuadratureRule])` for mixed-order N-D rules -- `TensorProductRule::isotropic(rule, dim)` for same rule in all dimensions -- `CubatureRule` with flat node storage, `integrate()` and `integrate_box()` methods -- Simple but exponential cost (curse of dimensionality); practical for d <= 4-5 - -### 3.2 Sparse grids (Smolyak) ✅ -- Smolyak combination technique from nested Clenshaw-Curtis rules -- `SparseGrid::clenshaw_curtis(dim, level)` with quantised point merging -- Practical for moderate dimensions (d <= ~20 for smooth functions) -- Verified: correct point counts, weight sums, polynomial exactness - -### 3.3 Adaptive cubature (Genz-Malik) ✅ -- h-adaptive: `BinaryHeap`-based subdivision of worst subregion -- Genz-Malik degree-7/5 embedded rule pair for error estimation -- Fourth-difference criterion for split axis selection -- `adaptive_cubature(f, lower, upper, tol)` convenience function -- d=1 delegates to 1D adaptive integrator - -### 3.4 Monte Carlo / quasi-Monte Carlo ✅ -- Plain pseudo-random MC with Welford's online variance (Xoshiro256++ PRNG) -- Quasi-MC: Sobol sequences (gray-code enumeration, 40 dimensions, Joe-Kuo direction numbers) -- Quasi-MC: Halton sequences (radical-inverse function, 100 primes) -- `MonteCarloIntegrator` builder with `Plain`/`Sobol`/`Halton` methods -- Heuristic error estimate for QMC via N/2 vs N comparison -- 136 unit tests + 28 doc tests - -**Milestone: v0.4.0** — Multi-dimensional integration. - -## Phase 4: Specialty Methods - -### 4.1 Oscillatory integrals ✅ -- Filon-Clenshaw-Curtis method for ∫f(x)sin(ωx)dx and ∫f(x)cos(ωx)dx -- Chebyshev expansion of f at Clenshaw-Curtis nodes -- Modified Chebyshev moments computed via Gauss-Legendre quadrature -- Phase-shifted combination for sin/cos kernels; small-ω fallback to adaptive -- `OscillatoryIntegrator` builder + `integrate_oscillatory_sin/cos` convenience functions - -### 4.2 Double-exponential (tanh-sinh) ✅ -- Transform x = tanh(π/2·sinh(t)) converts endpoint singularities to rapid decay -- Self-adaptive via level doubling: each level halves step size, reuses all prior points -- Handles algebraic (x^{-0.75}) and logarithmic (ln x) endpoint singularities -- `TanhSinh` builder + `tanh_sinh_integrate` convenience function - -### 4.3 Cauchy principal value ✅ -- PV ∫f(x)/(x-c)dx = ∫[f(x)-f(c)]/(x-c)dx + f(c)·ln((b-c)/(c-a)) -- Subtraction technique with adaptive integration of the regularised remainder -- Automatic breakpoint at the pole via `adaptive_integrate_with_breaks` -- `CauchyPV` builder + `pv_integrate` convenience function - -### 4.4 Weighted integration ✅ -- Unified API: `∫f(x)·w(x)dx` dispatching to appropriate Gaussian rule -- Jacobi, Laguerre, Hermite, Chebyshev I/II weight families -- LogWeight w(x)=-ln(x) via Gauss-Laguerre α=1 substitution (x=e^{-t}) -- `WeightedIntegrator` builder + `weighted_integrate` convenience function -- `integrate_over(a, b, f)` for affine-mapped finite domains -- 172 unit tests + 35 doc tests - -**Milestone: v0.5.0** — Specialty quadrature. - -## Phase 5: Performance and Polish - -### 5.1 Parallelism (`parallel` feature) -- Parallel adaptive subdivision (evaluate subintervals concurrently) -- Parallel node/weight generation for large n -- Vectorised integrand batching (evaluate f at multiple points per call) - -### 5.2 Precomputed rule caching -- `LazyRule` / compile-time tables for common orders -- Avoid recomputing nodes/weights in hot loops - -### 5.3 `no_std` core -- Core rule computations without alloc where feasible -- Fixed-size rules via const generics - -### 5.4 Comprehensive benchmarks -- Against gauss-quad, quadrature, quad_gk -- Against SciPy/QUADPACK reference values -- Criterion benchmarks for node generation, integration, adaptive convergence - -**Milestone: v1.0.0** — Production-ready. - -## Non-Goals (at least initially) - -- ODE/PDE solvers (different domain) -- Symbolic integration -- Arbitrary-precision arithmetic (stay in f32/f64 land) -- GPU acceleration (diminishing returns for quadrature vs. the integrand evaluation itself) - -## Key Design Decisions to Make Early - -1. **Trait bounds**: `F: Float` (num-traits) vs custom trait. Leaning toward num-traits `Float` for echidna compatibility. -2. **Node/weight storage**: `Vec` vs `&[F]` vs const-generic arrays. Likely `Vec` for flexibility with a `SmallRule` const-generic variant for known small sizes. -3. **Integrand signature**: `Fn(F) -> F` vs `Fn(&[F]) -> F` for multi-dimensional. Probably separate 1D and N-D APIs rather than a unified signature. -4. **Error model**: return `(value, error)` tuple vs a `QuadratureResult` struct. Struct is more extensible (can add num_evals, converged flag later). -5. **Rule representation**: separate types per family (GaussLegendre, GaussKronrod, etc.) vs enum. Separate types with a shared trait — more ergonomic and allows family-specific methods. - -## References - -- Bogaert (2014) — [Iteration-Free Computation of Gauss-Legendre Quadrature Nodes and Weights](https://www.cfm.brown.edu/faculty/gk/APMA2560/Handouts/GL_quad_Bogaert_2014.pdf) -- QUADPACK — Piessens, de Doncker-Kapenga, Uberhuber, Kahaner (1983) -- [FastGaussQuadrature.jl](https://github.com/JuliaApproximation/FastGaussQuadrature.jl) — Julia reference implementation -- [cubature](https://github.com/stevengj/cubature) — Steven G. Johnson's adaptive cubature in C -- Genz & Malik (1980) — adaptive cubature rules for hypercubes -- [SciPy integrate](https://docs.scipy.org/doc/scipy/tutorial/integrate.html) — Python QUADPACK wrapper -- [sparse-grids.de](http://www.sparse-grids.de/) — Sparse grid reference tables and theory diff --git a/benches/cubature.rs b/benches/cubature.rs new file mode 100644 index 0000000..344995d --- /dev/null +++ b/benches/cubature.rs @@ -0,0 +1,130 @@ +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; + +use bilby::cubature::{ + adaptive_cubature, monte_carlo_integrate, MCMethod, MonteCarloIntegrator, SparseGrid, + TensorProductRule, +}; +use bilby::GaussLegendre; + +fn bench_tensor_product(c: &mut Criterion) { + let mut group = c.benchmark_group("tensor_product"); + + // d=2, n=10 + let gl10 = GaussLegendre::new(10).unwrap(); + let tp_2d = TensorProductRule::isotropic(gl10.rule(), 2).unwrap(); + group.bench_function("d2_n10", |b| { + b.iter(|| { + tp_2d + .rule() + .integrate_box(&[0.0, 0.0], &[1.0, 1.0], |x| x[0] * x[1]) + }); + }); + + // d=3, n=5 + let gl5 = GaussLegendre::new(5).unwrap(); + let tp_3d = TensorProductRule::isotropic(gl5.rule(), 3).unwrap(); + group.bench_function("d3_n5", |b| { + b.iter(|| { + tp_3d + .rule() + .integrate_box(&[0.0, 0.0, 0.0], &[1.0, 1.0, 1.0], |x| x[0] * x[1] * x[2]) + }); + }); + + group.finish(); +} + +fn bench_sparse_grid(c: &mut Criterion) { + let mut group = c.benchmark_group("sparse_grid"); + + let sg_3_3 = SparseGrid::clenshaw_curtis(3, 3).unwrap(); + group.bench_function("d3_l3", |b| { + b.iter(|| { + sg_3_3 + .rule() + .integrate_box(&[0.0; 3], &[1.0; 3], |x| (x[0] + x[1] + x[2]).exp()) + }); + }); + + let sg_5_3 = SparseGrid::clenshaw_curtis(5, 3).unwrap(); + group.bench_function("d5_l3", |b| { + b.iter(|| { + sg_5_3.rule().integrate_box(&[0.0; 5], &[1.0; 5], |x| { + (x[0] + x[1] + x[2] + x[3] + x[4]).exp() + }) + }); + }); + + group.finish(); +} + +fn bench_adaptive_cubature(c: &mut Criterion) { + let mut group = c.benchmark_group("adaptive_cubature"); + + group.bench_function("d2_gaussian", |b| { + b.iter(|| { + adaptive_cubature( + |x| (-x[0] * x[0] - x[1] * x[1]).exp(), + &[0.0, 0.0], + &[1.0, 1.0], + 1e-6, + ) + .unwrap() + }); + }); + + group.bench_function("d3_polynomial", |b| { + b.iter(|| { + adaptive_cubature( + |x| x[0] * x[0] + x[1] * x[1] + x[2] * x[2], + &[0.0, 0.0, 0.0], + &[1.0, 1.0, 1.0], + 1e-8, + ) + .unwrap() + }); + }); + + group.finish(); +} + +fn bench_monte_carlo(c: &mut Criterion) { + let mut group = c.benchmark_group("monte_carlo"); + + let f = |x: &[f64]| (x[0] * x[1] * x[2]).exp(); + let lower = [0.0; 3]; + let upper = [1.0; 3]; + let n = 10_000; + + for (name, method) in [ + ("plain", MCMethod::Plain), + ("sobol", MCMethod::Sobol), + ("halton", MCMethod::Halton), + ] { + group.bench_with_input(BenchmarkId::new("3d_exp", name), &method, |b, &method| { + b.iter(|| { + MonteCarloIntegrator::default() + .with_method(method) + .with_samples(n) + .integrate(&lower, &upper, &f) + .unwrap() + }); + }); + } + + // Also benchmark the convenience function + group.bench_function("sobol_convenience_10k", |b| { + b.iter(|| monte_carlo_integrate(&f, &lower, &upper, n).unwrap()); + }); + + group.finish(); +} + +criterion_group!( + benches, + bench_tensor_product, + bench_sparse_grid, + bench_adaptive_cubature, + bench_monte_carlo, +); +criterion_main!(benches); diff --git a/benches/integration_1d.rs b/benches/integration_1d.rs new file mode 100644 index 0000000..124559a --- /dev/null +++ b/benches/integration_1d.rs @@ -0,0 +1,110 @@ +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; + +use bilby::{ + adaptive_integrate, tanh_sinh_integrate, GKPair, GaussKronrod, GaussLegendre, + OscillatoryIntegrator, OscillatoryKernel, +}; + +fn bench_fixed_order_gl(c: &mut Criterion) { + let mut group = c.benchmark_group("fixed_gl_sin"); + for n in [10, 20, 50] { + let gl = GaussLegendre::new(n).unwrap(); + group.bench_with_input(BenchmarkId::from_parameter(n), &gl, |b, gl| { + b.iter(|| { + gl.rule() + .integrate(0.0, std::f64::consts::PI, |x: f64| x.sin()) + }); + }); + } + group.finish(); +} + +fn bench_composite_gl(c: &mut Criterion) { + let gl = GaussLegendre::new(10).unwrap(); + let mut group = c.benchmark_group("composite_gl_sin"); + for panels in [10, 100, 1_000] { + group.bench_with_input( + BenchmarkId::from_parameter(panels), + &panels, + |b, &panels| { + b.iter(|| { + gl.rule() + .integrate_composite(0.0, std::f64::consts::PI, panels, |x: f64| x.sin()) + }); + }, + ); + } + group.finish(); +} + +fn bench_gk_pairs(c: &mut Criterion) { + let mut group = c.benchmark_group("gk_sin"); + for (name, pair) in [("G7K15", GKPair::G7K15), ("G15K31", GKPair::G15K31)] { + let gk = GaussKronrod::new(pair); + group.bench_function(name, |b| { + b.iter(|| gk.integrate(0.0, std::f64::consts::PI, |x: f64| x.sin())); + }); + } + group.finish(); +} + +fn bench_adaptive(c: &mut Criterion) { + let mut group = c.benchmark_group("adaptive"); + group.bench_function("smooth_sin", |b| { + b.iter(|| adaptive_integrate(|x: f64| x.sin(), 0.0, std::f64::consts::PI, 1e-10).unwrap()); + }); + group.bench_function("peaked", |b| { + b.iter(|| { + adaptive_integrate( + |x: f64| 1.0 / (1.0 + (x - 0.3).powi(2) * 1e4), + 0.0, + 1.0, + 1e-8, + ) + .unwrap() + }); + }); + group.bench_function("singular_sqrt", |b| { + b.iter(|| adaptive_integrate(|x: f64| 1.0 / x.sqrt(), 1e-15, 1.0, 1e-8).unwrap()); + }); + group.finish(); +} + +fn bench_tanh_sinh(c: &mut Criterion) { + let mut group = c.benchmark_group("tanh_sinh"); + group.bench_function("smooth_sin", |b| { + b.iter(|| tanh_sinh_integrate(|x: f64| x.sin(), 0.0, std::f64::consts::PI, 1e-10).unwrap()); + }); + group.bench_function("singular_sqrt", |b| { + b.iter(|| tanh_sinh_integrate(|x: f64| 1.0 / x.sqrt(), 0.0, 1.0, 1e-8).unwrap()); + }); + group.finish(); +} + +fn bench_oscillatory(c: &mut Criterion) { + let mut group = c.benchmark_group("oscillatory"); + for omega in [10.0, 100.0] { + group.bench_with_input( + BenchmarkId::new("sin", omega as u64), + &omega, + |b, &omega| { + let integrator = OscillatoryIntegrator::new(OscillatoryKernel::Sine, omega) + .with_order(64) + .with_abs_tol(1e-10); + b.iter(|| integrator.integrate(0.0, 1.0, |_| 1.0).unwrap()); + }, + ); + } + group.finish(); +} + +criterion_group!( + benches, + bench_fixed_order_gl, + bench_composite_gl, + bench_gk_pairs, + bench_adaptive, + bench_tanh_sinh, + bench_oscillatory, +); +criterion_main!(benches); diff --git a/benches/node_generation.rs b/benches/node_generation.rs new file mode 100644 index 0000000..fa43490 --- /dev/null +++ b/benches/node_generation.rs @@ -0,0 +1,56 @@ +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; + +use bilby::{ClenshawCurtis, GaussHermite, GaussJacobi, GaussLaguerre, GaussLegendre}; + +fn bench_gl_newton(c: &mut Criterion) { + let mut group = c.benchmark_group("gl_newton"); + for n in [10, 50, 100] { + group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &n| { + b.iter(|| GaussLegendre::new(n).unwrap()); + }); + } + group.finish(); +} + +fn bench_gl_bogaert(c: &mut Criterion) { + let mut group = c.benchmark_group("gl_bogaert"); + for n in [200, 1_000, 10_000, 100_000] { + group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &n| { + b.iter(|| GaussLegendre::new(n).unwrap()); + }); + } + group.finish(); +} + +fn bench_golub_welsch(c: &mut Criterion) { + let mut group = c.benchmark_group("golub_welsch"); + group.bench_function("jacobi_n50", |b| { + b.iter(|| GaussJacobi::new(50, 0.5, 0.5).unwrap()); + }); + group.bench_function("hermite_n50", |b| { + b.iter(|| GaussHermite::new(50).unwrap()); + }); + group.bench_function("laguerre_n50", |b| { + b.iter(|| GaussLaguerre::new(50, 0.0).unwrap()); + }); + group.finish(); +} + +fn bench_clenshaw_curtis(c: &mut Criterion) { + let mut group = c.benchmark_group("clenshaw_curtis"); + for n in [33, 65, 129] { + group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &n| { + b.iter(|| ClenshawCurtis::new(n).unwrap()); + }); + } + group.finish(); +} + +criterion_group!( + benches, + bench_gl_newton, + bench_gl_bogaert, + bench_golub_welsch, + bench_clenshaw_curtis, +); +criterion_main!(benches); diff --git a/src/adaptive.rs b/src/adaptive.rs index b466fb6..124258d 100644 --- a/src/adaptive.rs +++ b/src/adaptive.rs @@ -10,12 +10,18 @@ //! ``` //! use bilby::adaptive_integrate; //! -//! let result = adaptive_integrate(|x: f64| x.sin(), 0.0, std::f64::consts::PI, 1e-10).unwrap(); +//! let result = adaptive_integrate(|x: f64| x.sin(), 0.0, core::f64::consts::PI, 1e-10).unwrap(); //! assert!((result.value - 2.0).abs() < 1e-10); //! assert!(result.is_converged()); //! ``` -use std::cmp::Ordering; +use core::cmp::Ordering; + +#[cfg(not(feature = "std"))] +use alloc::collections::BinaryHeap; +#[cfg(not(feature = "std"))] +use alloc::{vec, vec::Vec}; +#[cfg(feature = "std")] use std::collections::BinaryHeap; use crate::error::QuadratureError; @@ -372,7 +378,7 @@ mod tests { /// Integral of sin(x) over [0, pi] = 2. #[test] fn sin_integral() { - let r = adaptive_integrate(f64::sin, 0.0, std::f64::consts::PI, 1e-12).unwrap(); + let r = adaptive_integrate(f64::sin, 0.0, core::f64::consts::PI, 1e-12).unwrap(); assert!(r.converged); assert!((r.value - 2.0).abs() < 1e-12, "value={}", r.value); } @@ -389,8 +395,8 @@ mod tests { /// Reversed bounds should negate the result. #[test] fn reversed_bounds() { - let forward = adaptive_integrate(f64::sin, 0.0, std::f64::consts::PI, 1e-12).unwrap(); - let reverse = adaptive_integrate(f64::sin, std::f64::consts::PI, 0.0, 1e-12).unwrap(); + let forward = adaptive_integrate(f64::sin, 0.0, core::f64::consts::PI, 1e-12).unwrap(); + let reverse = adaptive_integrate(f64::sin, core::f64::consts::PI, 0.0, 1e-12).unwrap(); assert!( (forward.value + reverse.value).abs() < 1e-12, "forward={}, reverse={}", @@ -450,7 +456,7 @@ mod tests { .with_abs_tol(1e-14) .with_rel_tol(1e-14) .with_max_evals(100_000) - .integrate(0.0, std::f64::consts::PI, f64::sin) + .integrate(0.0, core::f64::consts::PI, f64::sin) .unwrap(); assert!(r.converged, "err={}", r.error_estimate); assert!((r.value - 2.0).abs() < 1e-14, "value={}", r.value); diff --git a/src/cache.rs b/src/cache.rs new file mode 100644 index 0000000..139abce --- /dev/null +++ b/src/cache.rs @@ -0,0 +1,36 @@ +//! Precomputed quadrature rule cache. +//! +//! Provides lazily-initialised, globally-shared instances of common +//! Gauss-Legendre rules. Rules are computed once on first access and +//! reused for the lifetime of the process. +//! +//! # Example +//! +//! ``` +//! use bilby::cache::GL10; +//! +//! let result = GL10.rule().integrate(0.0, 1.0, |x: f64| x * x); +//! assert!((result - 1.0 / 3.0).abs() < 1e-14); +//! ``` + +use std::sync::LazyLock; + +use crate::gauss_legendre::GaussLegendre; + +/// Precomputed 5-point Gauss-Legendre rule. +pub static GL5: LazyLock = LazyLock::new(|| GaussLegendre::new(5).unwrap()); + +/// Precomputed 10-point Gauss-Legendre rule. +pub static GL10: LazyLock = LazyLock::new(|| GaussLegendre::new(10).unwrap()); + +/// Precomputed 15-point Gauss-Legendre rule. +pub static GL15: LazyLock = LazyLock::new(|| GaussLegendre::new(15).unwrap()); + +/// Precomputed 20-point Gauss-Legendre rule. +pub static GL20: LazyLock = LazyLock::new(|| GaussLegendre::new(20).unwrap()); + +/// Precomputed 50-point Gauss-Legendre rule. +pub static GL50: LazyLock = LazyLock::new(|| GaussLegendre::new(50).unwrap()); + +/// Precomputed 100-point Gauss-Legendre rule. +pub static GL100: LazyLock = LazyLock::new(|| GaussLegendre::new(100).unwrap()); diff --git a/src/cauchy_pv.rs b/src/cauchy_pv.rs index c45ed71..d6a6e80 100644 --- a/src/cauchy_pv.rs +++ b/src/cauchy_pv.rs @@ -18,6 +18,9 @@ use crate::adaptive::AdaptiveIntegrator; use crate::error::QuadratureError; use crate::result::QuadratureResult; +#[cfg(not(feature = "std"))] +use num_traits::Float as _; + /// Builder for Cauchy principal value integration. /// /// Computes PV ∫ₐᵇ f(x)/(x-c) dx via the subtraction technique: diff --git a/src/clenshaw_curtis.rs b/src/clenshaw_curtis.rs index ae1a9c1..f6a5971 100644 --- a/src/clenshaw_curtis.rs +++ b/src/clenshaw_curtis.rs @@ -12,6 +12,11 @@ use crate::error::QuadratureError; use crate::rule::QuadratureRule; +#[cfg(not(feature = "std"))] +use alloc::{vec, vec::Vec}; +#[cfg(not(feature = "std"))] +use num_traits::Float as _; + /// A Clenshaw-Curtis quadrature rule on \[-1, 1\]. /// /// # Example @@ -45,21 +50,25 @@ impl ClenshawCurtis { } /// Returns a reference to the underlying quadrature rule. + #[inline] pub fn rule(&self) -> &QuadratureRule { &self.rule } /// Returns the number of quadrature points. + #[inline] pub fn order(&self) -> usize { self.rule.order() } /// Returns the nodes on \[-1, 1\]. + #[inline] pub fn nodes(&self) -> &[f64] { &self.rule.nodes } /// Returns the weights. + #[inline] pub fn weights(&self) -> &[f64] { &self.rule.weights } @@ -70,7 +79,7 @@ impl ClenshawCurtis { /// For n == 1: midpoint rule (x=0, w=2). /// For n >= 2: nodes at x_k = cos(k π / (n-1)), weights via explicit formula. fn compute_clenshaw_curtis(n: usize) -> (Vec, Vec) { - use std::f64::consts::PI; + use core::f64::consts::PI; if n == 1 { return (vec![0.0], vec![2.0]); @@ -209,7 +218,7 @@ mod tests { #[test] fn sin_integration() { let cc = ClenshawCurtis::new(21).unwrap(); - let result = cc.rule().integrate(0.0, std::f64::consts::PI, f64::sin); + let result = cc.rule().integrate(0.0, core::f64::consts::PI, f64::sin); assert!((result - 2.0).abs() < 1e-12, "result={result}"); } diff --git a/src/cubature/adaptive.rs b/src/cubature/adaptive.rs index b4d2da0..ddd4549 100644 --- a/src/cubature/adaptive.rs +++ b/src/cubature/adaptive.rs @@ -7,7 +7,15 @@ //! Practical for dimensions d ≤ ~7 (the rule uses 2^d vertex evaluations). //! For d = 1, delegates to the 1D adaptive integrator. -use std::cmp::Ordering; +use core::cmp::Ordering; + +#[cfg(not(feature = "std"))] +use alloc::collections::BinaryHeap; +#[cfg(not(feature = "std"))] +use alloc::{vec, vec::Vec}; +#[cfg(not(feature = "std"))] +use num_traits::Float as _; +#[cfg(feature = "std")] use std::collections::BinaryHeap; use crate::error::QuadratureError; @@ -426,7 +434,7 @@ mod tests { #[test] fn delegates_to_1d() { let result = - adaptive_cubature(|x| x[0].sin(), &[0.0], &[std::f64::consts::PI], 1e-10).unwrap(); + adaptive_cubature(|x| x[0].sin(), &[0.0], &[core::f64::consts::PI], 1e-10).unwrap(); assert!((result.value - 2.0).abs() < 1e-10, "value={}", result.value); } diff --git a/src/cubature/halton.rs b/src/cubature/halton.rs index 0e520b8..38b20d4 100644 --- a/src/cubature/halton.rs +++ b/src/cubature/halton.rs @@ -5,6 +5,9 @@ use crate::error::QuadratureError; +#[cfg(not(feature = "std"))] +use alloc::vec::Vec; + /// First 100 primes for Halton sequence bases. const PRIMES: [u32; 100] = [ 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, @@ -93,6 +96,9 @@ fn radical_inverse(mut n: u64, base: u32) -> f64 { mod tests { use super::*; + #[cfg(not(feature = "std"))] + use alloc::vec; + #[test] fn first_few_points() { let mut hal = HaltonSequence::new(2).unwrap(); diff --git a/src/cubature/mod.rs b/src/cubature/mod.rs index 18b0e4c..2d583c2 100644 --- a/src/cubature/mod.rs +++ b/src/cubature/mod.rs @@ -38,6 +38,9 @@ pub use monte_carlo::{monte_carlo_integrate, MCMethod, MonteCarloIntegrator}; pub use sparse_grid::{SparseGrid, SparseGridBasis}; pub use tensor::TensorProductRule; +#[cfg(not(feature = "std"))] +use alloc::{vec, vec::Vec}; + /// A precomputed multi-dimensional cubature rule. /// /// Nodes are stored in a flat layout: node `i` occupies @@ -69,26 +72,31 @@ impl CubatureRule { } /// Number of cubature points. + #[inline] pub fn num_points(&self) -> usize { self.weights.len() } /// Spatial dimension. + #[inline] pub fn dim(&self) -> usize { self.dim } /// Access the i-th node as a slice of length `dim`. + #[inline] pub fn node(&self, i: usize) -> &[f64] { &self.nodes[i * self.dim..(i + 1) * self.dim] } /// The weights. + #[inline] pub fn weights(&self) -> &[f64] { &self.weights } /// Integrate `f` over the reference domain (assumes nodes are on \[-1, 1\]^d). + #[inline] pub fn integrate(&self, f: G) -> f64 where G: Fn(&[f64]) -> f64, @@ -126,6 +134,54 @@ impl CubatureRule { } sum * jacobian } + + /// Parallel integration over the reference domain \[-1, 1\]^d. + /// + /// Identical to [`integrate`](Self::integrate) but evaluates points in parallel. + #[cfg(feature = "parallel")] + pub fn integrate_par(&self, f: G) -> f64 + where + G: Fn(&[f64]) -> f64 + Sync, + { + use rayon::prelude::*; + + let d = self.dim; + (0..self.num_points()) + .into_par_iter() + .map(|i| self.weights[i] * f(&self.nodes[i * d..(i + 1) * d])) + .sum() + } + + /// Parallel integration over the hyperrectangle \[lower, upper\]. + /// + /// Identical to [`integrate_box`](Self::integrate_box) but evaluates points in parallel. + #[cfg(feature = "parallel")] + pub fn integrate_box_par(&self, lower: &[f64], upper: &[f64], f: G) -> f64 + where + G: Fn(&[f64]) -> f64 + Sync, + { + use rayon::prelude::*; + + assert_eq!(lower.len(), self.dim); + assert_eq!(upper.len(), self.dim); + + let d = self.dim; + let half_widths: Vec = (0..d).map(|j| 0.5 * (upper[j] - lower[j])).collect(); + let midpoints: Vec = (0..d).map(|j| 0.5 * (lower[j] + upper[j])).collect(); + let jacobian: f64 = half_widths.iter().product(); + + let sum: f64 = (0..self.num_points()) + .into_par_iter() + .map(|i| { + let node = &self.nodes[i * d..(i + 1) * d]; + let x: Vec = (0..d) + .map(|j| half_widths[j] * node[j] + midpoints[j]) + .collect(); + self.weights[i] * f(&x) + }) + .sum(); + sum * jacobian + } } #[cfg(test)] @@ -149,4 +205,34 @@ mod tests { let result = rule.integrate_box(&[0.0, 0.0], &[2.0, 3.0], |_| 1.0); assert!((result - 6.0).abs() < 1e-14); // area = 2*3 = 6 } + + /// Parallel integrate matches sequential on reference domain. + #[cfg(feature = "parallel")] + #[test] + fn integrate_par_matches_sequential() { + use crate::cubature::TensorProductRule; + use crate::GaussLegendre; + + let gl = GaussLegendre::new(10).unwrap(); + let tp = TensorProductRule::isotropic(gl.rule(), 3).unwrap(); + let f = |x: &[f64]| (x[0] * x[1] + x[2]).sin(); + let seq = tp.rule().integrate(&f); + let par = tp.rule().integrate_par(&f); + assert!((seq - par).abs() < 1e-14, "seq={seq}, par={par}"); + } + + /// Parallel integrate_box matches sequential. + #[cfg(feature = "parallel")] + #[test] + fn integrate_box_par_matches_sequential() { + use crate::cubature::TensorProductRule; + use crate::GaussLegendre; + + let gl = GaussLegendre::new(10).unwrap(); + let tp = TensorProductRule::isotropic(gl.rule(), 2).unwrap(); + let f = |x: &[f64]| x[0] * x[1]; + let seq = tp.rule().integrate_box(&[0.0, 0.0], &[1.0, 1.0], &f); + let par = tp.rule().integrate_box_par(&[0.0, 0.0], &[1.0, 1.0], &f); + assert!((seq - par).abs() < 1e-14, "seq={seq}, par={par}"); + } } diff --git a/src/cubature/monte_carlo.rs b/src/cubature/monte_carlo.rs index d7f6962..adca729 100644 --- a/src/cubature/monte_carlo.rs +++ b/src/cubature/monte_carlo.rs @@ -8,6 +8,11 @@ use crate::cubature::sobol::SobolSequence; use crate::error::QuadratureError; use crate::result::QuadratureResult; +#[cfg(not(feature = "std"))] +use alloc::{vec, vec::Vec}; +#[cfg(not(feature = "std"))] +use num_traits::Float as _; + /// Monte Carlo integration method. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum MCMethod { @@ -243,6 +248,222 @@ impl MonteCarloIntegrator { converged: true, }) } + + /// Parallel Monte Carlo integration over the hyperrectangle \[lower, upper\]. + /// + /// For Sobol and Halton methods, all sample points are pre-generated + /// sequentially (the sequences are inherently serial), then function + /// evaluations are parallelised. For Plain MC, the sample space is split + /// into independent chunks with separate PRNGs, giving statistically + /// equivalent but numerically different results compared to sequential. + #[cfg(feature = "parallel")] + pub fn integrate_par( + &self, + lower: &[f64], + upper: &[f64], + f: G, + ) -> Result, QuadratureError> + where + G: Fn(&[f64]) -> f64 + Sync, + { + let d = lower.len(); + if d == 0 || upper.len() != d { + return Err(QuadratureError::InvalidInput( + "lower and upper must have equal nonzero length", + )); + } + if self.num_samples == 0 { + return Err(QuadratureError::InvalidInput( + "number of samples must be >= 1", + )); + } + + let widths: Vec = (0..d).map(|j| upper[j] - lower[j]).collect(); + let volume: f64 = widths.iter().product(); + let n = self.num_samples; + + match self.method { + MCMethod::Plain => self.integrate_plain_par(d, lower, &widths, volume, n, &f), + MCMethod::Sobol => self.integrate_qmc_par_sobol(d, lower, &widths, volume, n, &f), + MCMethod::Halton => self.integrate_qmc_par_halton(d, lower, &widths, volume, n, &f), + } + } + + #[cfg(feature = "parallel")] + fn integrate_plain_par( + &self, + d: usize, + lower: &[f64], + widths: &[f64], + volume: f64, + n: usize, + f: &G, + ) -> Result, QuadratureError> + where + G: Fn(&[f64]) -> f64 + Sync, + { + use rayon::prelude::*; + + let num_chunks = rayon::current_num_threads().max(1); + let chunk_size = n / num_chunks; + let remainder = n % num_chunks; + + // Each chunk uses an independent PRNG seeded from self.seed + chunk_id + let chunk_results: Vec<(f64, f64, usize)> = (0..num_chunks) + .into_par_iter() + .map(|chunk_id| { + let my_n = chunk_size + if chunk_id < remainder { 1 } else { 0 }; + if my_n == 0 { + return (0.0, 0.0, 0); + } + + let mut rng = Xoshiro256pp::new(self.seed.wrapping_add(chunk_id as u64)); + let mut x = vec![0.0; d]; + let mut mean = 0.0; + let mut m2 = 0.0; + + for i in 1..=my_n { + for j in 0..d { + x[j] = lower[j] + widths[j] * rng.next_f64(); + } + let val = f(&x); + let delta = val - mean; + mean += delta / i as f64; + let delta2 = val - mean; + m2 += delta * delta2; + } + + (mean * my_n as f64, m2, my_n) + }) + .collect(); + + // Merge chunk results + let mut total_sum = 0.0; + let mut total_m2 = 0.0; + let mut total_n = 0usize; + for (sum, m2, count) in chunk_results { + total_sum += sum; + total_m2 += m2; + total_n += count; + } + + let mean = total_sum / total_n as f64; + let variance = if total_n > 1 { + total_m2 / (total_n - 1) as f64 + } else { + 0.0 + }; + let std_error = (variance / total_n as f64).sqrt() * volume; + + Ok(QuadratureResult { + value: volume * mean, + error_estimate: std_error, + num_evals: total_n, + converged: true, + }) + } + + #[cfg(feature = "parallel")] + fn integrate_qmc_par_sobol( + &self, + d: usize, + lower: &[f64], + widths: &[f64], + volume: f64, + n: usize, + f: &G, + ) -> Result, QuadratureError> + where + G: Fn(&[f64]) -> f64 + Sync, + { + use rayon::prelude::*; + + // Pre-generate all Sobol points (sequential — gray-code is inherently serial) + let mut sob = SobolSequence::new(d)?; + let mut points = vec![0.0; n * d]; + let mut u = vec![0.0; d]; + for i in 0..n { + sob.next_point(&mut u); + for j in 0..d { + points[i * d + j] = lower[j] + widths[j] * u[j]; + } + } + + // Parallel evaluation + let sum: f64 = (0..n) + .into_par_iter() + .map(|i| f(&points[i * d..(i + 1) * d])) + .sum(); + + let estimate = volume * sum / n as f64; + + // Heuristic error: compare N/2 and N estimates + let half = n / 2; + let half_sum: f64 = (0..half) + .into_par_iter() + .map(|i| f(&points[i * d..(i + 1) * d])) + .sum(); + let half_estimate = volume * half_sum / half as f64; + let error = (estimate - half_estimate).abs(); + + Ok(QuadratureResult { + value: estimate, + error_estimate: error, + num_evals: n + half, + converged: true, + }) + } + + #[cfg(feature = "parallel")] + fn integrate_qmc_par_halton( + &self, + d: usize, + lower: &[f64], + widths: &[f64], + volume: f64, + n: usize, + f: &G, + ) -> Result, QuadratureError> + where + G: Fn(&[f64]) -> f64 + Sync, + { + use rayon::prelude::*; + + // Pre-generate all Halton points + let mut hal = HaltonSequence::new(d)?; + let mut points = vec![0.0; n * d]; + let mut u = vec![0.0; d]; + for i in 0..n { + hal.next_point(&mut u); + for j in 0..d { + points[i * d + j] = lower[j] + widths[j] * u[j]; + } + } + + // Parallel evaluation + let sum: f64 = (0..n) + .into_par_iter() + .map(|i| f(&points[i * d..(i + 1) * d])) + .sum(); + + let estimate = volume * sum / n as f64; + + // Heuristic error + let half = n / 2; + let half_sum: f64 = (0..half) + .into_par_iter() + .map(|i| f(&points[i * d..(i + 1) * d])) + .sum(); + let half_estimate = volume * half_sum / half as f64; + let error = (estimate - half_estimate).abs(); + + Ok(QuadratureResult { + value: estimate, + error_estimate: error, + num_evals: n + half, + converged: true, + }) + } } /// Convenience: quasi-Monte Carlo integration using Sobol sequences. @@ -379,4 +600,55 @@ mod tests { // integral of (x+y) over [0,1]^2 = 1 assert!((result.value - 1.0).abs() < 0.05, "value={}", result.value); } + + /// Parallel Sobol QMC produces identical results (deterministic sequence). + #[cfg(feature = "parallel")] + #[test] + fn sobol_par_matches_sequential() { + let f = |x: &[f64]| (x[0] + x[1] + x[2]).exp(); + let integrator = MonteCarloIntegrator::default() + .with_method(MCMethod::Sobol) + .with_samples(5000); + let seq = integrator.integrate(&[0.0; 3], &[1.0; 3], &f).unwrap(); + let par = integrator.integrate_par(&[0.0; 3], &[1.0; 3], &f).unwrap(); + assert!( + (seq.value - par.value).abs() < 1e-12, + "seq={}, par={}", + seq.value, + par.value + ); + } + + /// Parallel Halton QMC produces identical results (deterministic sequence). + #[cfg(feature = "parallel")] + #[test] + fn halton_par_matches_sequential() { + let f = |x: &[f64]| x[0] * x[1]; + let integrator = MonteCarloIntegrator::default() + .with_method(MCMethod::Halton) + .with_samples(5000); + let seq = integrator.integrate(&[0.0, 0.0], &[1.0, 1.0], &f).unwrap(); + let par = integrator + .integrate_par(&[0.0, 0.0], &[1.0, 1.0], &f) + .unwrap(); + assert!( + (seq.value - par.value).abs() < 1e-12, + "seq={}, par={}", + seq.value, + par.value + ); + } + + /// Parallel plain MC produces reasonable results (different PRNG split, so not identical). + #[cfg(feature = "parallel")] + #[test] + fn plain_mc_par_reasonable() { + let integrator = MonteCarloIntegrator::default() + .with_method(MCMethod::Plain) + .with_samples(10000); + let result = integrator + .integrate_par(&[0.0, 0.0], &[1.0, 1.0], |x| x[0] * x[1]) + .unwrap(); + assert!((result.value - 0.25).abs() < 0.05, "value={}", result.value); + } } diff --git a/src/cubature/sobol.rs b/src/cubature/sobol.rs index 69afc2f..3a2c551 100644 --- a/src/cubature/sobol.rs +++ b/src/cubature/sobol.rs @@ -7,6 +7,9 @@ use crate::error::QuadratureError; +#[cfg(not(feature = "std"))] +use alloc::{vec, vec::Vec}; + /// Number of bits used for Sobol sequence generation. const BITS: u32 = 32; diff --git a/src/cubature/sparse_grid.rs b/src/cubature/sparse_grid.rs index e67b12d..a168df2 100644 --- a/src/cubature/sparse_grid.rs +++ b/src/cubature/sparse_grid.rs @@ -7,7 +7,15 @@ //! //! Uses Clenshaw-Curtis rules as the nested 1D basis (the canonical choice). -use std::collections::HashMap; +#[cfg(not(feature = "std"))] +use alloc::{vec, vec::Vec}; +#[cfg(not(feature = "std"))] +use num_traits::Float as _; + +#[cfg(not(feature = "std"))] +use alloc::collections::BTreeMap; +#[cfg(feature = "std")] +use std::collections::BTreeMap; use crate::cubature::CubatureRule; use crate::error::QuadratureError; @@ -59,21 +67,25 @@ impl SparseGrid { } /// Returns a reference to the underlying cubature rule. + #[inline] pub fn rule(&self) -> &CubatureRule { &self.rule } /// Number of cubature points. + #[inline] pub fn num_points(&self) -> usize { self.rule.num_points() } /// Spatial dimension. + #[inline] pub fn dim(&self) -> usize { self.rule.dim() } /// Smolyak level. + #[inline] pub fn level(&self) -> usize { self.level } @@ -101,7 +113,7 @@ fn cc_rule(n: usize) -> (Vec, Vec) { let nm1 = n - 1; let nm1_f = nm1 as f64; - let pi = std::f64::consts::PI; + let pi = core::f64::consts::PI; // Nodes: cos(k*pi/(n-1)) for k=0..n-1, reversed to ascending order let nodes: Vec = (0..n) @@ -152,9 +164,9 @@ fn build_smolyak(dim: usize, level: usize) -> CubatureRule { let cc_rules: Vec<(Vec, Vec)> = (0..=max_level).map(|l| cc_rule(cc_order(l))).collect(); - // Accumulate points via HashMap for exact merging + // Accumulate points via BTreeMap for exact merging // Key: quantised d-dimensional point - let mut point_map: HashMap, (Vec, f64)> = HashMap::new(); + let mut point_map: BTreeMap, (Vec, f64)> = BTreeMap::new(); // Enumerate all multi-indices l = (l_0, ..., l_{d-1}) with each l_j >= 0 // and |l| in [max(level, dim) - dim, level] @@ -237,8 +249,11 @@ fn build_smolyak(dim: usize, level: usize) -> CubatureRule { pairs.sort_by(|a, b| { a.0.iter() .zip(b.0.iter()) - .find_map(|(x, y)| x.partial_cmp(y).filter(|o| *o != std::cmp::Ordering::Equal)) - .unwrap_or(std::cmp::Ordering::Equal) + .find_map(|(x, y)| { + x.partial_cmp(y) + .filter(|o| *o != core::cmp::Ordering::Equal) + }) + .unwrap_or(core::cmp::Ordering::Equal) }); let n = pairs.len(); @@ -395,7 +410,7 @@ mod tests { (x[0] + x[1] + x[2]).exp() }); // Exact: (e-1)^3 ≈ 5.07321... - let e_minus_1 = std::f64::consts::E - 1.0; + let e_minus_1 = core::f64::consts::E - 1.0; let expected = e_minus_1 * e_minus_1 * e_minus_1; assert!( (result - expected).abs() < 1e-6, diff --git a/src/cubature/tensor.rs b/src/cubature/tensor.rs index a8aae15..ceceb39 100644 --- a/src/cubature/tensor.rs +++ b/src/cubature/tensor.rs @@ -7,6 +7,9 @@ use crate::cubature::CubatureRule; use crate::error::QuadratureError; use crate::rule::QuadratureRule; +#[cfg(not(feature = "std"))] +use alloc::{vec, vec::Vec}; + /// A tensor-product cubature rule formed from 1D quadrature rules. /// /// # Example @@ -80,16 +83,19 @@ impl TensorProductRule { } /// Returns a reference to the underlying cubature rule. + #[inline] pub fn rule(&self) -> &CubatureRule { &self.rule } /// Number of cubature points. + #[inline] pub fn num_points(&self) -> usize { self.rule.num_points() } /// Spatial dimension. + #[inline] pub fn dim(&self) -> usize { self.rule.dim() } diff --git a/src/error.rs b/src/error.rs index f2528f4..69fe2c3 100644 --- a/src/error.rs +++ b/src/error.rs @@ -27,4 +27,5 @@ impl fmt::Display for QuadratureError { } } +#[cfg(feature = "std")] impl std::error::Error for QuadratureError {} diff --git a/src/gauss_chebyshev.rs b/src/gauss_chebyshev.rs index 4c23baa..3f5b2ad 100644 --- a/src/gauss_chebyshev.rs +++ b/src/gauss_chebyshev.rs @@ -10,11 +10,16 @@ //! - Nodes: `x_k = cos(k π / (n + 1))`, k = 1, ..., n //! - Weights: `w_k = π / (n + 1) * sin²(k π / (n + 1))` -use std::f64::consts::PI; +use core::f64::consts::PI; use crate::error::QuadratureError; use crate::rule::QuadratureRule; +#[cfg(not(feature = "std"))] +use alloc::vec::Vec; +#[cfg(not(feature = "std"))] +use num_traits::Float as _; + /// Gauss-Chebyshev Type I quadrature rule. /// /// Weight function `w(x) = 1 / sqrt(1 - x²)` on \[-1, 1\]. @@ -30,7 +35,7 @@ use crate::rule::QuadratureRule; /// assert_eq!(gc.order(), 10); /// // All weights equal pi/n for Type I /// for &w in gc.weights() { -/// assert!((w - std::f64::consts::PI / 10.0).abs() < 1e-14); +/// assert!((w - core::f64::consts::PI / 10.0).abs() < 1e-14); /// } /// ``` #[derive(Debug, Clone)] @@ -65,21 +70,25 @@ impl GaussChebyshevFirstKind { } /// Returns a reference to the underlying quadrature rule. + #[inline] pub fn rule(&self) -> &QuadratureRule { &self.rule } /// Returns the number of quadrature points. + #[inline] pub fn order(&self) -> usize { self.rule.order() } /// Returns the nodes on \[-1, 1\]. + #[inline] pub fn nodes(&self) -> &[f64] { &self.rule.nodes } /// Returns the weights. + #[inline] pub fn weights(&self) -> &[f64] { &self.rule.weights } @@ -130,21 +139,25 @@ impl GaussChebyshevSecondKind { } /// Returns a reference to the underlying quadrature rule. + #[inline] pub fn rule(&self) -> &QuadratureRule { &self.rule } /// Returns the number of quadrature points. + #[inline] pub fn order(&self) -> usize { self.rule.order() } /// Returns the nodes on \[-1, 1\]. + #[inline] pub fn nodes(&self) -> &[f64] { &self.rule.nodes } /// Returns the weights. + #[inline] pub fn weights(&self) -> &[f64] { &self.rule.weights } diff --git a/src/gauss_hermite.rs b/src/gauss_hermite.rs index 60eba34..62eceff 100644 --- a/src/gauss_hermite.rs +++ b/src/gauss_hermite.rs @@ -9,6 +9,11 @@ use crate::error::QuadratureError; use crate::golub_welsch::golub_welsch; use crate::rule::QuadratureRule; +#[cfg(not(feature = "std"))] +use alloc::{vec, vec::Vec}; +#[cfg(not(feature = "std"))] +use num_traits::Float as _; + /// A Gauss-Hermite quadrature rule. /// /// Integrates `f(x) * e^(-x²)` over (-∞, ∞) using n points. @@ -21,7 +26,7 @@ use crate::rule::QuadratureRule; /// // Integral of e^(-x^2) over (-inf, inf) = sqrt(pi) /// let gh = GaussHermite::new(10).unwrap(); /// let result: f64 = gh.nodes().iter().zip(gh.weights()).map(|(_, &w)| w).sum(); -/// assert!((result - std::f64::consts::PI.sqrt()).abs() < 1e-12); +/// assert!((result - core::f64::consts::PI.sqrt()).abs() < 1e-12); /// ``` #[derive(Debug, Clone)] pub struct GaussHermite { @@ -42,21 +47,25 @@ impl GaussHermite { } /// Returns a reference to the underlying quadrature rule. + #[inline] pub fn rule(&self) -> &QuadratureRule { &self.rule } /// Returns the number of quadrature points. + #[inline] pub fn order(&self) -> usize { self.rule.order() } /// Returns the nodes on (-∞, ∞). + #[inline] pub fn nodes(&self) -> &[f64] { &self.rule.nodes } /// Returns the weights. + #[inline] pub fn weights(&self) -> &[f64] { &self.rule.weights } @@ -72,7 +81,7 @@ impl GaussHermite { fn compute_hermite(n: usize) -> (Vec, Vec) { let diag = vec![0.0; n]; let off_diag_sq: Vec = (1..n).map(|k| k as f64 / 2.0).collect(); - let mu0 = std::f64::consts::PI.sqrt(); + let mu0 = core::f64::consts::PI.sqrt(); golub_welsch(&diag, &off_diag_sq, mu0) } @@ -80,7 +89,7 @@ fn compute_hermite(n: usize) -> (Vec, Vec) { #[cfg(test)] mod tests { use super::*; - use std::f64::consts::PI; + use core::f64::consts::PI; #[test] fn zero_order() { diff --git a/src/gauss_jacobi.rs b/src/gauss_jacobi.rs index c2534e1..aa3d33c 100644 --- a/src/gauss_jacobi.rs +++ b/src/gauss_jacobi.rs @@ -16,6 +16,11 @@ use crate::error::QuadratureError; use crate::golub_welsch::golub_welsch; use crate::rule::QuadratureRule; +#[cfg(not(feature = "std"))] +use alloc::vec::Vec; +#[cfg(not(feature = "std"))] +use num_traits::Float as _; + /// A Gauss-Jacobi quadrature rule. /// /// # Example @@ -30,7 +35,7 @@ use crate::rule::QuadratureRule; /// // Weight sum = integral of (1-x)^alpha * (1+x)^beta over [-1,1] /// // For alpha=beta=0.5: B(1.5, 1.5) * 2^2 = pi/2 /// let sum: f64 = gj.weights().iter().sum(); -/// assert!((sum - std::f64::consts::PI / 2.0).abs() < 1e-12); +/// assert!((sum - core::f64::consts::PI / 2.0).abs() < 1e-12); /// ``` #[derive(Debug, Clone)] pub struct GaussJacobi { @@ -72,21 +77,25 @@ impl GaussJacobi { } /// Returns a reference to the underlying quadrature rule. + #[inline] pub fn rule(&self) -> &QuadratureRule { &self.rule } /// Returns the number of quadrature points. + #[inline] pub fn order(&self) -> usize { self.rule.order() } /// Returns the nodes on \[-1, 1\]. + #[inline] pub fn nodes(&self) -> &[f64] { &self.rule.nodes } /// Returns the weights. + #[inline] pub fn weights(&self) -> &[f64] { &self.rule.weights } @@ -144,7 +153,7 @@ fn compute_jacobi(n: usize, alpha: f64, beta: f64) -> (Vec, Vec) { .collect(); // μ₀ = 2^(ab+1) Γ(α+1)Γ(β+1) / Γ(ab+2) - let mu0 = ((ab + 1.0) * std::f64::consts::LN_2 + ln_gamma(alpha + 1.0) + ln_gamma(beta + 1.0) + let mu0 = ((ab + 1.0) * core::f64::consts::LN_2 + ln_gamma(alpha + 1.0) + ln_gamma(beta + 1.0) - ln_gamma(ab + 2.0)) .exp(); @@ -173,7 +182,7 @@ pub(crate) fn ln_gamma(x: f64) -> f64 { if x < 0.5 { // Reflection formula let z = 1.0 - x; - std::f64::consts::PI.ln() - (std::f64::consts::PI * x).sin().ln() - ln_gamma(z) + core::f64::consts::PI.ln() - (core::f64::consts::PI * x).sin().ln() - ln_gamma(z) } else { let z = x - 1.0; let mut sum = COEFF[0]; @@ -181,14 +190,14 @@ pub(crate) fn ln_gamma(x: f64) -> f64 { sum += c / (z + i as f64); } let t = z + 7.5; // g + 0.5 - 0.5 * (2.0 * std::f64::consts::PI).ln() + (t.ln() * (z + 0.5)) - t + sum.ln() + 0.5 * (2.0 * core::f64::consts::PI).ln() + (t.ln() * (z + 0.5)) - t + sum.ln() } } #[cfg(test)] mod tests { use super::*; - use std::f64::consts::PI; + use core::f64::consts::PI; #[test] fn zero_order() { @@ -231,7 +240,7 @@ mod tests { /// Helper: integral of (1-x)^a (1+x)^b over [-1,1]. fn jacobi_integral(a: f64, b: f64) -> f64 { let log_val = - (a + b + 1.0) * std::f64::consts::LN_2 + ln_gamma(a + 1.0) + ln_gamma(b + 1.0) + (a + b + 1.0) * core::f64::consts::LN_2 + ln_gamma(a + 1.0) + ln_gamma(b + 1.0) - ln_gamma(a + b + 2.0); log_val.exp() } diff --git a/src/gauss_kronrod.rs b/src/gauss_kronrod.rs index 925e169..181a0f6 100644 --- a/src/gauss_kronrod.rs +++ b/src/gauss_kronrod.rs @@ -14,6 +14,9 @@ //! compile time. #![allow(clippy::excessive_precision)] +#[cfg(not(feature = "std"))] +use num_traits::Float as _; + /// Detailed result from a single Gauss-Kronrod panel evaluation. /// /// Used internally by the adaptive integrator for error analysis. @@ -42,6 +45,7 @@ pub enum GKPair { impl GKPair { /// Number of Kronrod nodes. + #[inline] pub fn kronrod_order(self) -> usize { match self { Self::G7K15 => 15, @@ -53,6 +57,7 @@ impl GKPair { } /// Number of Gauss nodes embedded in the Kronrod rule. + #[inline] pub fn gauss_order(self) -> usize { match self { Self::G7K15 => 7, @@ -78,7 +83,7 @@ impl GKPair { /// /// // Integrate sin(x) over [0, pi] with error estimate /// let (estimate, error) = gk.integrate( -/// 0.0, std::f64::consts::PI, |x: f64| x.sin() +/// 0.0, core::f64::consts::PI, |x: f64| x.sin() /// ); /// assert!((estimate - 2.0).abs() < 1e-14); /// assert!(error < 1e-10); @@ -110,6 +115,7 @@ impl GaussKronrod { } /// Returns the pair type. + #[inline] pub fn pair(&self) -> GKPair { self.pair } @@ -596,7 +602,7 @@ mod tests { fn integrate_sin() { for pair in [GKPair::G7K15, GKPair::G10K21, GKPair::G15K31] { let gk = GaussKronrod::new(pair); - let (est, err) = gk.integrate(0.0, std::f64::consts::PI, f64::sin); + let (est, err) = gk.integrate(0.0, core::f64::consts::PI, f64::sin); assert!((est - 2.0).abs() < 1e-12, "{pair:?}: estimate = {est}"); assert!(err < 1e-6, "{pair:?}: error estimate = {err}"); } diff --git a/src/gauss_laguerre.rs b/src/gauss_laguerre.rs index 49513c7..df81eb0 100644 --- a/src/gauss_laguerre.rs +++ b/src/gauss_laguerre.rs @@ -12,6 +12,11 @@ use crate::gauss_jacobi::ln_gamma; use crate::golub_welsch::golub_welsch; use crate::rule::QuadratureRule; +#[cfg(not(feature = "std"))] +use alloc::vec::Vec; +#[cfg(not(feature = "std"))] +use num_traits::Float as _; + /// A Gauss-Laguerre quadrature rule. /// /// Integrates `f(x) * x^α * e^(-x)` over \[0, ∞) using n points. @@ -59,21 +64,25 @@ impl GaussLaguerre { } /// Returns a reference to the underlying quadrature rule. + #[inline] pub fn rule(&self) -> &QuadratureRule { &self.rule } /// Returns the number of quadrature points. + #[inline] pub fn order(&self) -> usize { self.rule.order() } /// Returns the nodes on \[0, ∞). + #[inline] pub fn nodes(&self) -> &[f64] { &self.rule.nodes } /// Returns the weights. + #[inline] pub fn weights(&self) -> &[f64] { &self.rule.weights } diff --git a/src/gauss_legendre.rs b/src/gauss_legendre.rs index 3c2510c..15799da 100644 --- a/src/gauss_legendre.rs +++ b/src/gauss_legendre.rs @@ -13,6 +13,11 @@ use crate::error::QuadratureError; use crate::rule::QuadratureRule; +#[cfg(not(feature = "std"))] +use alloc::{vec, vec::Vec}; +#[cfg(not(feature = "std"))] +use num_traits::Float as _; + /// A Gauss-Legendre quadrature rule. /// /// Exact for polynomials of degree 2n - 1, where n is the number of points. @@ -48,24 +53,45 @@ impl GaussLegendre { } /// Returns a reference to the underlying quadrature rule. + #[inline] pub fn rule(&self) -> &QuadratureRule { &self.rule } /// Returns the number of quadrature points. + #[inline] pub fn order(&self) -> usize { self.rule.order() } /// Returns the nodes on \[-1, 1\]. + #[inline] pub fn nodes(&self) -> &[f64] { &self.rule.nodes } /// Returns the weights. + #[inline] pub fn weights(&self) -> &[f64] { &self.rule.weights } + + /// Create a new n-point Gauss-Legendre rule with parallel node generation. + /// + /// Only beneficial for large n (> ~1000) where the Bogaert asymptotic + /// path is used. For small n, the overhead of thread scheduling exceeds + /// the computation time. + #[cfg(feature = "parallel")] + pub fn new_par(n: usize) -> Result { + if n == 0 { + return Err(QuadratureError::ZeroOrder); + } + + let (nodes, weights) = compute_gl_pair_par(n); + Ok(Self { + rule: QuadratureRule { nodes, weights }, + }) + } } // --------------------------------------------------------------------------- @@ -107,7 +133,7 @@ fn compute_newton(n: usize, m: usize, nodes: &mut [f64], weights: &mut [f64]) { for i in 0..m { // Tricomi initial guess (1-indexed: k = i + 1) let k = (i + 1) as f64; - let theta = std::f64::consts::PI * (4.0 * k - 1.0) / (4.0 * nf + 2.0); + let theta = core::f64::consts::PI * (4.0 * k - 1.0) / (4.0 * nf + 2.0); let mut x = theta.cos(); // Newton iteration on P_n(x) using the three-term recurrence @@ -199,7 +225,7 @@ fn bessel_j0_zero(k: usize) -> f64 { BESSEL_J0_ZEROS[k - 1] } else { // McMahon's asymptotic expansion for large zeros of J_0 - let z = std::f64::consts::PI * (k as f64 - 0.25); + let z = core::f64::consts::PI * (k as f64 - 0.25); let r = 1.0 / z; let r2 = r * r; z + r @@ -349,6 +375,74 @@ fn bogaert_pair(n: usize, k: usize) -> (f64, f64) { (theta_refined, weight) } +/// Compute all n nodes and weights in parallel. +/// +/// Uses the same Newton/Bogaert split as the sequential version, but +/// parallelises the per-node computation using rayon. +#[cfg(feature = "parallel")] +fn compute_gl_pair_par(n: usize) -> (Vec, Vec) { + use rayon::prelude::*; + + let m = n.div_ceil(2); + + if n <= ASYMPTOTIC_THRESHOLD { + // Newton path: each node is independent after computing the initial guess + let pairs: Vec<(usize, f64, f64)> = (0..m) + .into_par_iter() + .map(|i| { + let k = (i + 1) as f64; + let nf = n as f64; + let theta = core::f64::consts::PI * (4.0 * k - 1.0) / (4.0 * nf + 2.0); + let mut x = theta.cos(); + + for _ in 0..100 { + let (p_n, p_n_deriv) = legendre_eval(n, x); + let dx = -p_n / p_n_deriv; + x += dx; + if dx.abs() < 2.0 * f64::EPSILON * x.abs().max(1.0) { + break; + } + } + + let (_, p_n_deriv) = legendre_eval(n, x); + let w = 2.0 / ((1.0 - x * x) * p_n_deriv * p_n_deriv); + (i, x, w) + }) + .collect(); + + let mut nodes = vec![0.0_f64; n]; + let mut weights = vec![0.0_f64; n]; + for (i, x, w) in pairs { + nodes[n - 1 - i] = x; + weights[n - 1 - i] = w; + nodes[i] = -x; + weights[i] = w; + } + (nodes, weights) + } else { + // Bogaert path: embarrassingly parallel + let pairs: Vec<(usize, f64, f64)> = (0..m) + .into_par_iter() + .map(|i| { + let (theta, weight) = bogaert_pair(n, i + 1); + (i, theta.cos(), weight) + }) + .collect(); + + let mut nodes = vec![0.0_f64; n]; + let mut weights = vec![0.0_f64; n]; + for (i, x, w) in pairs { + nodes[n - 1 - i] = x; + weights[n - 1 - i] = w; + if i != n - 1 - i { + nodes[i] = -x; + weights[i] = w; + } + } + (nodes, weights) + } +} + /// Compute GL nodes/weights via Bogaert's asymptotic expansion. fn compute_bogaert(n: usize, m: usize, nodes: &mut [f64], weights: &mut [f64]) { for i in 0..m { @@ -504,7 +598,7 @@ mod tests { #[test] fn integrate_sin_on_zero_to_pi() { let gl = GaussLegendre::new(20).unwrap(); - let result = gl.rule().integrate(0.0, std::f64::consts::PI, f64::sin); + let result = gl.rule().integrate(0.0, core::f64::consts::PI, f64::sin); assert!((result - 2.0).abs() < 1e-13, "got {result}, expected 2.0"); } @@ -552,4 +646,38 @@ mod tests { ); } } + + /// Parallel node generation matches sequential for both Newton and Bogaert paths. + #[cfg(feature = "parallel")] + #[test] + fn new_par_matches_sequential() { + for n in [5, 50, 100, 200, 1000] { + let seq = GaussLegendre::new(n).unwrap(); + let par = GaussLegendre::new_par(n).unwrap(); + for i in 0..n { + let node_err = (seq.nodes()[i] - par.nodes()[i]).abs(); + let weight_err = (seq.weights()[i] - par.weights()[i]).abs(); + assert!(node_err < 1e-15, "n={n}, i={i}: node diff = {node_err}"); + assert!( + weight_err < 1e-15, + "n={n}, i={i}: weight diff = {weight_err}" + ); + } + } + } + + /// Parallel composite integration matches sequential. + #[cfg(feature = "parallel")] + #[test] + fn composite_par_matches_sequential() { + let gl = GaussLegendre::new(5).unwrap(); + let f = |x: f64| x.sin(); + let seq = gl + .rule() + .integrate_composite(0.0, core::f64::consts::PI, 100, f); + let par = gl + .rule() + .integrate_composite_par(0.0, core::f64::consts::PI, 100, f); + assert!((seq - par).abs() < 1e-14, "seq={seq}, par={par}"); + } } diff --git a/src/gauss_lobatto.rs b/src/gauss_lobatto.rs index 0c846d8..c11a929 100644 --- a/src/gauss_lobatto.rs +++ b/src/gauss_lobatto.rs @@ -10,6 +10,11 @@ use crate::error::QuadratureError; use crate::gauss_legendre::legendre_eval; use crate::rule::QuadratureRule; +#[cfg(not(feature = "std"))] +use alloc::{vec, vec::Vec}; +#[cfg(not(feature = "std"))] +use num_traits::Float as _; + /// A Gauss-Lobatto quadrature rule on \[-1, 1\]. /// /// # Example @@ -49,21 +54,25 @@ impl GaussLobatto { } /// Returns a reference to the underlying quadrature rule. + #[inline] pub fn rule(&self) -> &QuadratureRule { &self.rule } /// Returns the number of quadrature points. + #[inline] pub fn order(&self) -> usize { self.rule.order() } /// Returns the nodes on \[-1, 1\]. + #[inline] pub fn nodes(&self) -> &[f64] { &self.rule.nodes } /// Returns the weights. + #[inline] pub fn weights(&self) -> &[f64] { &self.rule.weights } @@ -95,7 +104,7 @@ fn compute_lobatto(n: usize) -> (Vec, Vec) { let m = n - 2; // number of interior nodes for k in 0..m { // Initial guess: Chebyshev-type - let theta = std::f64::consts::PI * (k as f64 + 1.0) / (m as f64 + 1.0); + let theta = core::f64::consts::PI * (k as f64 + 1.0) / (m as f64 + 1.0); let mut x = -(theta.cos()); // Newton on P'_{n-1}(x) = 0. diff --git a/src/gauss_radau.rs b/src/gauss_radau.rs index 5fb8730..b22c2e1 100644 --- a/src/gauss_radau.rs +++ b/src/gauss_radau.rs @@ -13,6 +13,9 @@ use crate::error::QuadratureError; use crate::golub_welsch::{golub_welsch, radau_modify}; use crate::rule::QuadratureRule; +#[cfg(not(feature = "std"))] +use alloc::{vec, vec::Vec}; + /// A Gauss-Radau quadrature rule on \[-1, 1\]. /// /// # Example @@ -64,21 +67,25 @@ impl GaussRadau { } /// Returns a reference to the underlying quadrature rule. + #[inline] pub fn rule(&self) -> &QuadratureRule { &self.rule } /// Returns the number of quadrature points. + #[inline] pub fn order(&self) -> usize { self.rule.order() } /// Returns the nodes on \[-1, 1\]. + #[inline] pub fn nodes(&self) -> &[f64] { &self.rule.nodes } /// Returns the weights. + #[inline] pub fn weights(&self) -> &[f64] { &self.rule.weights } diff --git a/src/golub_welsch.rs b/src/golub_welsch.rs index 2d63e47..d11f0b9 100644 --- a/src/golub_welsch.rs +++ b/src/golub_welsch.rs @@ -9,6 +9,11 @@ //! Reference: Golub & Welsch (1969), "Calculation of Gauss Quadrature Rules", //! Mathematics of Computation 23(106), 221-230. +#[cfg(not(feature = "std"))] +use alloc::{vec, vec::Vec}; +#[cfg(not(feature = "std"))] +use num_traits::Float as _; + /// Compute quadrature nodes and weights from three-term recurrence coefficients. /// /// # Arguments diff --git a/src/lib.rs b/src/lib.rs index a5b6ce6..c2948d3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +#![cfg_attr(not(feature = "std"), no_std)] //! # bilby //! //! A high-performance numerical quadrature (integration) library for Rust. @@ -12,6 +13,14 @@ //! - **Precomputed rules** — generate nodes and weights once, integrate many times //! - **No heap allocation on hot paths** — rule construction allocates, integration does not //! - **Separate 1D and N-D APIs** — `Fn(F) -> F` for 1D, `Fn(&[F]) -> F` for N-D +//! - **`no_std` compatible** — works without the standard library (with `alloc`) +//! +//! ## Feature Flags +//! +//! | Feature | Default | Description | +//! |---------|---------|-------------| +//! | `std` | Yes | Enables `std::error::Error` impl and [`cache`] module | +//! | `parallel` | No | Enables rayon-based `_par` methods (implies `std`) | //! //! ## Quick Start //! @@ -32,7 +41,7 @@ //! use bilby::{GaussKronrod, GKPair}; //! //! let gk = GaussKronrod::new(GKPair::G7K15); -//! let (estimate, error) = gk.integrate(0.0, std::f64::consts::PI, f64::sin); +//! let (estimate, error) = gk.integrate(0.0, core::f64::consts::PI, f64::sin); //! assert!((estimate - 2.0).abs() < 1e-14); //! ``` //! @@ -41,7 +50,7 @@ //! ``` //! use bilby::adaptive_integrate; //! -//! let result = adaptive_integrate(|x: f64| x.sin(), 0.0, std::f64::consts::PI, 1e-12).unwrap(); +//! let result = adaptive_integrate(|x: f64| x.sin(), 0.0, core::f64::consts::PI, 1e-12).unwrap(); //! assert!((result.value - 2.0).abs() < 1e-12); //! assert!(result.is_converged()); //! ``` @@ -53,10 +62,44 @@ //! //! // Integral of exp(-x^2) over (-inf, inf) = sqrt(pi) //! let result = integrate_infinite(|x: f64| (-x * x).exp(), 1e-10).unwrap(); -//! assert!((result.value - std::f64::consts::PI.sqrt()).abs() < 1e-8); +//! assert!((result.value - core::f64::consts::PI.sqrt()).abs() < 1e-8); +//! ``` +//! +//! ## Multi-Dimensional Integration +//! +//! ``` +//! use bilby::cubature::{TensorProductRule, adaptive_cubature}; +//! use bilby::GaussLegendre; +//! +//! // Tensor product: 10-point GL in each of 2 dimensions +//! let gl = GaussLegendre::new(10).unwrap(); +//! let tp = TensorProductRule::isotropic(gl.rule(), 2).unwrap(); +//! let result = tp.rule().integrate_box( +//! &[0.0, 0.0], &[1.0, 1.0], +//! |x| x[0] * x[1], +//! ); +//! assert!((result - 0.25).abs() < 1e-14); //! ``` +//! +//! ## Precomputed Rule Cache +//! +//! Available when the `std` feature is enabled (default): +//! +//! ``` +//! # #[cfg(feature = "std")] { +//! use bilby::cache::GL10; +//! +//! let result = GL10.rule().integrate(0.0, 1.0, |x: f64| x * x); +//! assert!((result - 1.0 / 3.0).abs() < 1e-14); +//! # } +//! ``` + +#[cfg(not(feature = "std"))] +extern crate alloc; pub mod adaptive; +#[cfg(feature = "std")] +pub mod cache; pub mod cauchy_pv; pub mod clenshaw_curtis; pub mod cubature; diff --git a/src/oscillatory.rs b/src/oscillatory.rs index 720dc97..bcb8617 100644 --- a/src/oscillatory.rs +++ b/src/oscillatory.rs @@ -22,6 +22,11 @@ use crate::error::QuadratureError; use crate::gauss_legendre::GaussLegendre; use crate::result::QuadratureResult; +#[cfg(not(feature = "std"))] +use alloc::{boxed::Box, vec, vec::Vec}; +#[cfg(not(feature = "std"))] +use num_traits::Float as _; + /// Type of oscillatory kernel. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum OscillatoryKernel { @@ -120,7 +125,7 @@ impl OscillatoryIntegrator { // Step 1: Evaluate f at Clenshaw-Curtis nodes on [-1, 1] let f_vals: Vec = (0..=n) .map(|k| { - let t = (k as f64 * std::f64::consts::PI / n as f64).cos(); + let t = (k as f64 * core::f64::consts::PI / n as f64).cos(); f(mid + half * t) }) .collect(); @@ -220,7 +225,7 @@ impl OscillatoryIntegrator { /// c_n are halved). fn chebyshev_coefficients(f_vals: &[f64], n: usize) -> Vec { let mut coeffs = vec![0.0; n + 1]; - let pi_n = std::f64::consts::PI / n as f64; + let pi_n = core::f64::consts::PI / n as f64; for (j, cj) in coeffs.iter_mut().enumerate() { let mut sum = 0.0; @@ -342,7 +347,7 @@ mod tests { fn sin_trivial() { // ∫₀^π sin(x) dx = 2 (ω=1, f=1) let result = - integrate_oscillatory_sin(|_| 1.0, 0.0, std::f64::consts::PI, 1.0, 1e-10).unwrap(); + integrate_oscillatory_sin(|_| 1.0, 0.0, core::f64::consts::PI, 1.0, 1e-10).unwrap(); assert!((result.value - 2.0).abs() < 1e-6, "value={}", result.value); } @@ -374,7 +379,7 @@ mod tests { fn cos_with_linear_f() { // ∫₀^π x cos(x) dx = [x sin(x) + cos(x)]₀^π = -1 - 1 = -2 let result = - integrate_oscillatory_cos(|x| x, 0.0, std::f64::consts::PI, 1.0, 1e-8).unwrap(); + integrate_oscillatory_cos(|x| x, 0.0, core::f64::consts::PI, 1.0, 1e-8).unwrap(); assert!( (result.value - (-2.0)).abs() < 1e-4, "value={}", @@ -389,7 +394,7 @@ mod tests { let r = 1.0; let i = omega; // e^(1+iω) = e * (cos(ω) + i sin(ω)) - let e = std::f64::consts::E; + let e = core::f64::consts::E; let re_num = e * omega.cos() - 1.0; let im_num = e * omega.sin(); let denom = r * r + i * i; // 1 + ω² diff --git a/src/result.rs b/src/result.rs index b03fc19..db8547c 100644 --- a/src/result.rs +++ b/src/result.rs @@ -21,6 +21,7 @@ pub struct QuadratureResult { impl QuadratureResult { /// Returns `true` if the integration converged within tolerance. + #[inline] pub fn is_converged(&self) -> bool { self.converged } diff --git a/src/rule.rs b/src/rule.rs index e64d876..fe873d3 100644 --- a/src/rule.rs +++ b/src/rule.rs @@ -2,6 +2,9 @@ use num_traits::Float; +#[cfg(not(feature = "std"))] +use alloc::vec::Vec; + /// A precomputed quadrature rule on the reference interval \[-1, 1\]. /// /// Stores nodes and weights that can be reused across many integrations. @@ -16,6 +19,7 @@ pub struct QuadratureRule { impl QuadratureRule { /// Returns the number of points in this rule. + #[inline] pub fn order(&self) -> usize { self.nodes.len() } @@ -26,6 +30,7 @@ impl QuadratureRule { /// x = (b - a) / 2 * t + (a + b) / 2 /// /// The result is scaled by (b - a) / 2. + #[inline] pub fn integrate(&self, a: F, b: F, f: G) -> F where G: Fn(F) -> F, @@ -61,4 +66,29 @@ impl QuadratureRule { } total } + + /// Parallel composite integration over \[a, b\] with `n_panels` subintervals. + /// + /// Identical to [`integrate_composite`](Self::integrate_composite) but evaluates + /// panels in parallel using rayon. Requires `F: Send + Sync` and `f: Fn(F) -> F + Sync`. + #[cfg(feature = "parallel")] + pub fn integrate_composite_par(&self, a: F, b: F, n_panels: usize, f: G) -> F + where + F: Send + Sync, + G: Fn(F) -> F + Sync, + { + use rayon::prelude::*; + + let n = F::from(n_panels).unwrap(); + let panel_width = (b - a) / n; + + (0..n_panels) + .into_par_iter() + .map(|i| { + let panel_a = a + F::from(i).unwrap() * panel_width; + let panel_b = panel_a + panel_width; + self.integrate(panel_a, panel_b, &f) + }) + .reduce(F::zero, |a, b| a + b) + } } diff --git a/src/tanh_sinh.rs b/src/tanh_sinh.rs index 3962cbc..66433b7 100644 --- a/src/tanh_sinh.rs +++ b/src/tanh_sinh.rs @@ -20,6 +20,9 @@ use crate::error::QuadratureError; use crate::result::QuadratureResult; +#[cfg(not(feature = "std"))] +use num_traits::Float as _; + /// Builder for tanh-sinh (double-exponential) quadrature. /// /// # Example @@ -93,7 +96,7 @@ impl TanhSinh { let mid = 0.5 * (a + b); let half = 0.5 * (b - a); - let pi_2 = std::f64::consts::FRAC_PI_2; + let pi_2 = core::f64::consts::FRAC_PI_2; let mut total_evals = 0usize; let mut prev_estimate: f64 = 0.0; @@ -305,7 +308,7 @@ mod tests { // ∫₀¹ 1/√(x(1-x)) dx = π let result = tanh_sinh_integrate(|x| 1.0 / (x * (1.0 - x)).sqrt(), 0.0, 1.0, 1e-8).unwrap(); assert!( - (result.value - std::f64::consts::PI).abs() < 1e-6, + (result.value - core::f64::consts::PI).abs() < 1e-6, "value={}", result.value ); @@ -316,7 +319,7 @@ mod tests { // ∫₋₁¹ 1/√(1-x²) dx = π let result = tanh_sinh_integrate(|x| 1.0 / (1.0 - x * x).sqrt(), -1.0, 1.0, 1e-8).unwrap(); assert!( - (result.value - std::f64::consts::PI).abs() < 1e-6, + (result.value - core::f64::consts::PI).abs() < 1e-6, "value={}", result.value ); @@ -325,7 +328,7 @@ mod tests { #[test] fn sin_integral() { // ∫₀^π sin(x) dx = 2 (smooth, sanity check) - let result = tanh_sinh_integrate(|x| x.sin(), 0.0, std::f64::consts::PI, 1e-10).unwrap(); + let result = tanh_sinh_integrate(|x| x.sin(), 0.0, core::f64::consts::PI, 1e-10).unwrap(); assert!((result.value - 2.0).abs() < 1e-8, "value={}", result.value); } diff --git a/src/transforms.rs b/src/transforms.rs index d7ea596..c3f5f9b 100644 --- a/src/transforms.rs +++ b/src/transforms.rs @@ -148,7 +148,7 @@ where /// /// // Integral of exp(-x^2) over (-inf, inf) = sqrt(pi) /// let r = integrate_infinite(|x: f64| (-x * x).exp(), 1e-10).unwrap(); -/// assert!((r.value - std::f64::consts::PI.sqrt()).abs() < 1e-8); +/// assert!((r.value - core::f64::consts::PI.sqrt()).abs() < 1e-8); /// ``` pub fn integrate_infinite(f: G, tol: f64) -> Result, QuadratureError> where @@ -177,7 +177,7 @@ where #[cfg(test)] mod tests { use super::*; - use std::f64::consts::PI; + use core::f64::consts::PI; /// exp(-x) over [0, inf) = 1. #[test] diff --git a/src/weighted.rs b/src/weighted.rs index cfe81b5..4cd1a9a 100644 --- a/src/weighted.rs +++ b/src/weighted.rs @@ -10,7 +10,7 @@ //! //! // ∫₋₁¹ 1/√(1-x²) dx = π (Chebyshev Type I weight) //! let result = weighted_integrate(|_| 1.0, WeightFunction::ChebyshevI, 20).unwrap(); -//! assert!((result - std::f64::consts::PI).abs() < 1e-12); +//! assert!((result - core::f64::consts::PI).abs() < 1e-12); //! ``` use crate::error::QuadratureError; @@ -19,6 +19,9 @@ use crate::gauss_hermite::GaussHermite; use crate::gauss_jacobi::GaussJacobi; use crate::gauss_laguerre::GaussLaguerre; +#[cfg(not(feature = "std"))] +use num_traits::Float as _; + /// Known weight functions for product integration. #[derive(Debug, Clone)] pub enum WeightFunction { @@ -46,7 +49,7 @@ pub enum WeightFunction { /// let wi = WeightedIntegrator::new(WeightFunction::Hermite, 20).unwrap(); /// // ∫₋∞^∞ 1 · e^(-x²) dx = √π /// let result = wi.integrate(|_| 1.0); -/// assert!((result - std::f64::consts::PI.sqrt()).abs() < 1e-12); +/// assert!((result - core::f64::consts::PI.sqrt()).abs() < 1e-12); /// ``` #[derive(Debug, Clone)] pub struct WeightedIntegrator { @@ -208,7 +211,7 @@ mod tests { ) .unwrap(); assert!( - (result - std::f64::consts::FRAC_PI_2).abs() < 1e-10, + (result - core::f64::consts::FRAC_PI_2).abs() < 1e-10, "result={result}" ); } @@ -234,7 +237,7 @@ mod tests { // ∫₋∞^∞ e^(-x²) dx = √π let result = weighted_integrate(|_| 1.0, WeightFunction::Hermite, 10).unwrap(); assert!( - (result - std::f64::consts::PI.sqrt()).abs() < 1e-10, + (result - core::f64::consts::PI.sqrt()).abs() < 1e-10, "result={result}" ); } @@ -244,7 +247,7 @@ mod tests { // ∫₋₁¹ 1/√(1-x²) dx = π let result = weighted_integrate(|_| 1.0, WeightFunction::ChebyshevI, 20).unwrap(); assert!( - (result - std::f64::consts::PI).abs() < 1e-12, + (result - core::f64::consts::PI).abs() < 1e-12, "result={result}" ); } @@ -254,7 +257,7 @@ mod tests { // ∫₋₁¹ √(1-x²) dx = π/2 let result = weighted_integrate(|_| 1.0, WeightFunction::ChebyshevII, 20).unwrap(); assert!( - (result - std::f64::consts::FRAC_PI_2).abs() < 1e-12, + (result - core::f64::consts::FRAC_PI_2).abs() < 1e-12, "result={result}" ); }