From 174300f099e56097caa30bc7bc077a17a6ad8ca0 Mon Sep 17 00:00:00 2001 From: s-celles Date: Thu, 12 Mar 2026 09:30:36 +0100 Subject: [PATCH 1/8] ECM with BigInt and GMP --- .gitignore | 55 ++++++++++- src/Primes.jl | 1 + src/ecm.jl | 233 +++++++++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 55 +++++++++++ 4 files changed, 340 insertions(+), 4 deletions(-) create mode 100644 src/ecm.jl diff --git a/.gitignore b/.gitignore index 9847c84..63d7e8b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,54 @@ +* + +### Allowed files and directories ### + +!.gitignore +!.github/ +!.github/workflows/ +!.github/workflows/* +!.github/dependabot.yml + +!LICENSE.md +!README.md +!Project.toml + +!src/ +!src/*.jl +!test/ +!test/*.jl +!docs/ +!docs/src/ +!docs/src/*.md +!docs/make.jl +!docs/Project.toml + +### Denied even if allowed above ### + +# Files generated by invoking Julia with --code-coverage *.jl.cov *.jl.*.cov + +# Files generated by invoking Julia with --track-allocation *.jl.mem -Manifest.toml -docs/build -docs/site -docs/Manifest.toml + +# System-specific files and directories generated by the BinaryProvider and BinDeps packages +# They contain absolute paths specific to the host computer, and so should not be committed +deps/deps.jl +deps/build.log +deps/downloads/ +deps/usr/ +deps/src/ + +# Build artifacts for creating documentation generated by the Documenter package +docs/build/ +docs/site/ + +# File generated by Pkg, the package manager, based on a corresponding Project.toml +# It records a fixed state of all packages used by the project. As such, it should not be +# committed for packages, but should be committed for applications that require a static +# environment. +Manifest*.toml + +# File generated by the Preferences package to store local preferences +LocalPreferences.toml +JuliaLocalPreferences.toml \ No newline at end of file diff --git a/src/Primes.jl b/src/Primes.jl index b95fdac..9880b89 100644 --- a/src/Primes.jl +++ b/src/Primes.jl @@ -12,6 +12,7 @@ export isprime, primes, primesmask, factor, eachfactor, divisors, ismersenneprim nextprime, nextprimes, prevprime, prevprimes, prime, prodfactors, radical, totient include("factorization.jl") +include("ecm.jl") # Primes generating functions # https://en.wikipedia.org/wiki/Sieve_of_Eratosthenes diff --git a/src/ecm.jl b/src/ecm.jl new file mode 100644 index 0000000..7388f6c --- /dev/null +++ b/src/ecm.jl @@ -0,0 +1,233 @@ +# Elliptic Curve Method (ECM) for integer factorization +# Ref: Lenstra (1987) "Factoring integers with elliptic curves" +# Ref: Montgomery (1987) "Speeding the Pollard and Elliptic Curve Methods of Factorization" + +""" +Point on a Montgomery curve in projective coordinates (X:Z). +The point at infinity is represented by Z == 0. +""" +struct MontgomeryCurvePoint + X::BigInt + Z::BigInt +end + +""" +In-place modular reduction: sets r = n mod d (non-negative remainder). +""" +function _mpz_fdiv_r!(r::BigInt, n::BigInt, d::BigInt) + ccall((:__gmpz_fdiv_r, :libgmp), Cvoid, (Ref{BigInt}, Ref{BigInt}, Ref{BigInt}), r, n, d) +end + +""" +Preallocated scratch space for ECM point arithmetic. +Avoids BigInt allocation in the hot Montgomery ladder loop. +t1-t6: scratch for add!/double!; R0/R1/tmp: scratch for scalar_mul! +""" +struct ECMBuffers + t1::BigInt + t2::BigInt + t3::BigInt + t4::BigInt + t5::BigInt + t6::BigInt + R0_X::BigInt + R0_Z::BigInt + R1_X::BigInt + R1_Z::BigInt + tmp_X::BigInt + tmp_Z::BigInt +end + +ECMBuffers() = ECMBuffers(BigInt(), BigInt(), BigInt(), BigInt(), BigInt(), BigInt(), + BigInt(), BigInt(), BigInt(), BigInt(), BigInt(), BigInt()) + +""" +In-place: mulmod!(dst, a, b, n, tmp) sets dst = (a * b) mod n using tmp as scratch. +""" +@inline function _mulmod!(dst::BigInt, a::BigInt, b::BigInt, n::BigInt, tmp::BigInt) + Base.GMP.MPZ.mul!(tmp, a, b) + _mpz_fdiv_r!(dst, tmp, n) +end + +""" +Differential addition on Montgomery curve: given P, Q and P-Q, compute P+Q. +Uses projective coordinates and in-place arithmetic to avoid allocations. +""" +function _ecm_add!(res_X::BigInt, res_Z::BigInt, + P_X::BigInt, P_Z::BigInt, Q_X::BigInt, Q_Z::BigInt, + diff_X::BigInt, diff_Z::BigInt, n::BigInt, buf::ECMBuffers) + t1, t2, t3, t4, t5, t6 = buf.t1, buf.t2, buf.t3, buf.t4, buf.t5, buf.t6 + # u = (P.X - P.Z) * (Q.X + Q.Z) mod n + Base.GMP.MPZ.sub!(t1, P_X, P_Z) # t1 = P.X - P.Z + Base.GMP.MPZ.add!(t2, Q_X, Q_Z) # t2 = Q.X + Q.Z + _mulmod!(t5, t1, t2, n, t3) # t5 = u + + # v = (P.X + P.Z) * (Q.X - Q.Z) mod n + Base.GMP.MPZ.add!(t1, P_X, P_Z) # t1 = P.X + P.Z + Base.GMP.MPZ.sub!(t2, Q_X, Q_Z) # t2 = Q.X - Q.Z + _mulmod!(t6, t1, t2, n, t3) # t6 = v + + # add = u + v, sub = u - v + Base.GMP.MPZ.add!(t1, t5, t6) # t1 = add = u + v + Base.GMP.MPZ.sub!(t2, t5, t6) # t2 = sub = u - v + + # X = diff.Z * add^2 mod n + _mulmod!(t3, t1, t1, n, t4) # t3 = add^2 mod n + _mulmod!(res_X, diff_Z, t3, n, t4) # res_X = diff.Z * add^2 mod n + + # Z = diff.X * sub^2 mod n + _mulmod!(t3, t2, t2, n, t4) # t3 = sub^2 mod n + _mulmod!(res_Z, diff_X, t3, n, t4) # res_Z = diff.X * sub^2 mod n +end + +""" +In-place point doubling on Montgomery curve with parameter a24 = (a+2)/4. +""" +function _ecm_double!(res_X::BigInt, res_Z::BigInt, + P_X::BigInt, P_Z::BigInt, + n::BigInt, a24::BigInt, buf::ECMBuffers) + t1, t2, t3, t4, t5, t6 = buf.t1, buf.t2, buf.t3, buf.t4, buf.t5, buf.t6 + # u = (P.X + P.Z)^2 mod n + Base.GMP.MPZ.add!(t1, P_X, P_Z) # t1 = P.X + P.Z + _mulmod!(t5, t1, t1, n, t3) # t5 = u = (P.X+P.Z)^2 mod n + + # v = (P.X - P.Z)^2 mod n + Base.GMP.MPZ.sub!(t1, P_X, P_Z) # t1 = P.X - P.Z + _mulmod!(t6, t1, t1, n, t3) # t6 = v = (P.X-P.Z)^2 mod n + + # diff = u - v + Base.GMP.MPZ.sub!(t1, t5, t6) # t1 = diff = u - v + + # X = u * v mod n + _mulmod!(res_X, t5, t6, n, t3) # res_X = u * v mod n + + # Z = diff * (v + a24 * diff) mod n + _mulmod!(t2, a24, t1, n, t3) # t2 = a24 * diff mod n + Base.GMP.MPZ.add!(t2, t6) # t2 = v + a24 * diff + _mulmod!(res_Z, t1, t2, n, t3) # res_Z = diff * (v + a24*diff) mod n +end + +""" +Montgomery ladder scalar multiplication: compute [k]P on Montgomery curve. +Uses preallocated buffers to avoid allocation in the inner loop. +Returns the point [k]P as (res_X, res_Z). +""" +function _ecm_scalar_mul!(res_X::BigInt, res_Z::BigInt, + k::BigInt, P_X::BigInt, P_Z::BigInt, + n::BigInt, a24::BigInt, buf::ECMBuffers) + R0_X, R0_Z = buf.R0_X, buf.R0_Z + R1_X, R1_Z = buf.R1_X, buf.R1_Z + tmp_X, tmp_Z = buf.tmp_X, buf.tmp_Z + + # R0 = P, R1 = 2P + Base.GMP.MPZ.set!(R0_X, P_X) + Base.GMP.MPZ.set!(R0_Z, P_Z) + _ecm_double!(R1_X, R1_Z, P_X, P_Z, n, a24, buf) + + bits = ndigits(k, base=2) + for i in (bits - 2):-1:0 + if isodd(k >> i) + _ecm_add!(tmp_X, tmp_Z, R0_X, R0_Z, R1_X, R1_Z, P_X, P_Z, n, buf) + Base.GMP.MPZ.set!(R0_X, tmp_X) + Base.GMP.MPZ.set!(R0_Z, tmp_Z) + _ecm_double!(tmp_X, tmp_Z, R1_X, R1_Z, n, a24, buf) + Base.GMP.MPZ.set!(R1_X, tmp_X) + Base.GMP.MPZ.set!(R1_Z, tmp_Z) + else + _ecm_add!(tmp_X, tmp_Z, R0_X, R0_Z, R1_X, R1_Z, P_X, P_Z, n, buf) + Base.GMP.MPZ.set!(R1_X, tmp_X) + Base.GMP.MPZ.set!(R1_Z, tmp_Z) + _ecm_double!(tmp_X, tmp_Z, R0_X, R0_Z, n, a24, buf) + Base.GMP.MPZ.set!(R0_X, tmp_X) + Base.GMP.MPZ.set!(R0_Z, tmp_Z) + end + end + Base.GMP.MPZ.set!(res_X, R0_X) + Base.GMP.MPZ.set!(res_Z, R0_Z) +end + +""" + ecm_factor(n::BigInt, B1::Int, num_curves::Int) -> Union{BigInt, Nothing} + +Attempt to find a non-trivial factor of `n` using the Elliptic Curve Method. +Computes [m]P where m = lcm(1..B1) = prod(p^floor(log_p(B1)) for p prime ≤ B1). +Uses batched gcd (accumulate Z coordinates, check periodically) to reduce gcd calls. +Returns a factor or `nothing` if none found within the curve budget. +""" +function ecm_factor(n::BigInt, B1::Int, num_curves::Int)::Union{BigInt, Nothing} + # Precompute prime powers for Stage 1 + prime_powers = BigInt[] + for p in primes(B1) + pk = BigInt(p) + while pk * p <= B1 + pk *= p + end + push!(prime_powers, pk) + end + + buf = ECMBuffers() + Q_X = BigInt() + Q_Z = BigInt() + tmp_mul = BigInt() # scratch for acc * Q.Z + + for _ in 1:num_curves + # Generate random curve via σ parameter (Suyama's parametrization) + σ = BigInt(rand(6:10^9)) + u = mod(σ * σ - 5, n) + v = mod(4 * σ, n) + x0 = mod(u * u * u, n) + z0 = mod(v * v * v, n) + + vu_diff = mod(v - u, n) + a24_num = mod(vu_diff^3 * mod(3 * u + v, n), n) + a24_den = mod(16 * x0 * v, n) + + g = gcd(a24_den, n) + if g > 1 && g < n + return g + end + if g == n + continue + end + + a24_den_inv = invmod(a24_den, n) + a24 = mod(a24_num * a24_den_inv, n) + + Base.GMP.MPZ.set!(Q_X, x0) + Base.GMP.MPZ.set!(Q_Z, z0) + + # Stage 1: multiply Q by each prime power, with batched gcd + degenerate = false + acc = BigInt(1) + batch_count = 0 + for pk in prime_powers + _ecm_scalar_mul!(Q_X, Q_Z, pk, Q_X, Q_Z, n, a24, buf) + Base.GMP.MPZ.mul!(tmp_mul, acc, Q_Z) + _mpz_fdiv_r!(acc, tmp_mul, n) + batch_count += 1 + + if batch_count >= 100 + g = gcd(acc, n) + if g > 1 && g < n + return g + end + if g == n + degenerate = true + break + end + Base.GMP.MPZ.set_si!(acc, 1) + batch_count = 0 + end + end + + degenerate && continue + + if batch_count > 0 + g = gcd(acc, n) + if g > 1 && g < n + return g + end + end + end + return nothing +end diff --git a/test/runtests.jl b/test/runtests.jl index f63e59a..e064d5d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -511,3 +511,58 @@ end end @test primes(2^31-20, 2^31-1) == [2147483629, 2147483647] end + +@testset "ECM factorization" begin + # Semi-prime with a ~40-bit factor + p1 = big"824633720831" # 40-bit prime + q1 = big"1000000007" # 30-bit prime + n1 = p1 * q1 + f1 = factor(n1) + @test prodfactors(f1) == n1 + @test all(isprime(p) for (p, _) in f1) + + # p ~40 bits, q ~80 bits + p2 = big"824633720831" + q2 = big"1180591620717411303449" # ~80-bit prime + n2 = p2 * q2 + f2 = factor(n2) + @test prodfactors(f2) == n2 + @test all(isprime(p) for (p, _) in f2) +end + +@testset "MPQS factorization" begin + # Semi-prime with two ~60-bit factors + p1 = big"780002082420426809" + q1 = big"810735269523504809437013569" + n1 = p1 * q1 + f1 = factor(n1) + @test prodfactors(f1) == n1 + @test all(isprime(p) for (p, _) in f1) +end + +@testset "BigInt factorization" begin + # factor(BigInt(n)) returns Factorization{BigInt} + n1 = big"2152302898747" + f1 = factor(n1) + @test f1 isa Primes.Factorization{BigInt} + @test prodfactors(f1) == n1 + + # BigInt prime returns single-entry + p = big"359334085968622831041960188598043661065388726959079837" + f2 = factor(p) + @test length(f2) == 1 + @test first(f2) == (p => 1) +end + +@testset "ECM edge cases" begin + # ECM with a small semi-prime + n = big"1000000007" * big"1000000009" + result = Primes.ecm_factor(n, 2000, 50) + @test result !== nothing + @test mod(n, result) == 0 + + # ECM returns nothing when curves are insufficient for a hard number + hard = nextprime(big"10"^30) * nextprime(big"10"^30 + 1000) + result2 = Primes.ecm_factor(hard, 100, 1) + # May or may not find a factor with just 1 curve — no assertion on result +end From 5213d1af4d3444b9a9278588af5071bc450550ea Mon Sep 17 00:00:00 2001 From: s-celles Date: Thu, 12 Mar 2026 10:03:07 +0100 Subject: [PATCH 2/8] feat: make ECM generic for any T<:Integer, including BitIntegers.jl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous implementation was GMP-only, forcing callers to convert to BigInt even when a fixed-width type (e.g. UInt256 from BitIntegers.jl) would be much faster. Changes in src/ecm.jl: - Add _addmod / _submod / _mulmod generic helpers that use widen(T) for overflow-safe arithmetic. For BitIntegers types the widen chain stays in LLVM fixed-width integers (UInt256 → UInt512 → UInt1024), so no BigInt allocation occurs in the inner loop. - Add generic _ecm_add / _ecm_double / _ecm_scalar_mul (functional style, no mutation) that work for any T<:Integer. - Add generic ecm_factor(n::T, ...) where T<:Integer dispatching to the generic helpers. - Retain the BigInt-specific ecm_factor(n::BigInt, ...) overload using GMP in-place arithmetic (ECMBuffers) for large BigInt factorizations. The _ecm_add! / _ecm_double! / _ecm_scalar_mul! (bang variants) are BigInt-only and called exclusively from this overload. - Remove MontgomeryCurvePoint struct (unused after refactor). - Update _ecm_scalar_mul! to accept k::Integer (was k::BigInt). Changes in test/runtests.jl: - Add testset 'ECM generic integer types' covering UInt128 (widen→BigInt path) and UInt256/Int256 from BitIntegers.jl (widen→UInt512 LLVM path). Changes in Project.toml: - Add BitIntegers as a test-only dependency ([extras] + [targets]). --- Project.toml | 14 +- src/ecm.jl | 332 ++++++++++++++++++++++++++++++++--------------- test/runtests.jl | 34 ++++- 3 files changed, 269 insertions(+), 111 deletions(-) diff --git a/Project.toml b/Project.toml index 460e0eb..f4d7abb 100644 --- a/Project.toml +++ b/Project.toml @@ -5,14 +5,16 @@ version = "0.5.7" [deps] IntegerMathUtils = "18e54dd8-cb9d-406c-a71d-865a43cbb235" +[compat] +BitIntegers = "0.3" +IntegerMathUtils = "0.1.1" +julia = "1.6" + [extras] -IntegerMathUtils = "18e54dd8-cb9d-406c-a71d-865a43cbb235" +BitIntegers = "c3b6d118-76ef-56ca-8cc7-ebb389d030a1" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +IntegerMathUtils = "18e54dd8-cb9d-406c-a71d-865a43cbb235" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["DataStructures", "IntegerMathUtils", "Test"] - -[compat] -IntegerMathUtils = "0.1.1" -julia = "1.6" +test = ["BitIntegers", "DataStructures", "IntegerMathUtils", "Test"] diff --git a/src/ecm.jl b/src/ecm.jl index 7388f6c..c689255 100644 --- a/src/ecm.jl +++ b/src/ecm.jl @@ -2,27 +2,47 @@ # Ref: Lenstra (1987) "Factoring integers with elliptic curves" # Ref: Montgomery (1987) "Speeding the Pollard and Elliptic Curve Methods of Factorization" -""" -Point on a Montgomery curve in projective coordinates (X:Z). -The point at infinity is represented by Z == 0. -""" -struct MontgomeryCurvePoint - X::BigInt - Z::BigInt +# ============================================================================= +# Generic modular arithmetic helpers (work for any T<:Integer, including +# fixed-width types from BitIntegers.jl such as UInt256, UInt512, etc.) +# Uses widen(T) for overflow-safe multiplication; for BitIntegers types, +# widen returns the next fixed-width type (e.g. widen(UInt256) == UInt512), +# so no BigInt allocation occurs. +# ============================================================================= + +# Modular addition: (a + b) mod n, for a, b ∈ [0, n). +# Safe as long as a + b does not overflow T, which holds when n ≪ typemax(T). +@inline function _addmod(a::T, b::T, n::T) where {T<:Integer} + r = a + b + r >= n ? r - n : r end -""" -In-place modular reduction: sets r = n mod d (non-negative remainder). -""" +# Modular subtraction: (a - b) mod n, for a, b ∈ [0, n). +# Written as n - (b - a) when a < b to avoid underflow in unsigned types. +@inline function _submod(a::T, b::T, n::T) where {T<:Integer} + a >= b ? a - b : n - (b - a) +end + +# Modular multiplication: (a * b) mod n. +# Uses widen(T) to avoid overflow for fixed-width integers. +# For BigInt, widen returns BigInt itself, so this is simply mod(a * b, n). +@inline function _mulmod(a::T, b::T, n::T) where {T<:Integer} + W = widen(T) + T(mod(W(a) * W(b), W(n))) +end + +# ============================================================================= +# GMP-optimized path for BigInt: in-place arithmetic avoids allocations in +# the hot Montgomery ladder loop, which matters for large-integer factoring. +# ============================================================================= + +# In-place modular reduction: r = n mod d (non-negative remainder). function _mpz_fdiv_r!(r::BigInt, n::BigInt, d::BigInt) ccall((:__gmpz_fdiv_r, :libgmp), Cvoid, (Ref{BigInt}, Ref{BigInt}, Ref{BigInt}), r, n, d) end -""" -Preallocated scratch space for ECM point arithmetic. -Avoids BigInt allocation in the hot Montgomery ladder loop. -t1-t6: scratch for add!/double!; R0/R1/tmp: scratch for scalar_mul! -""" +# Preallocated scratch space for the BigInt ECM inner loop. +# t1–t6: scratch for add!/double!; R0/R1/tmp: scratch for scalar_mul! struct ECMBuffers t1::BigInt t2::BigInt @@ -41,89 +61,58 @@ end ECMBuffers() = ECMBuffers(BigInt(), BigInt(), BigInt(), BigInt(), BigInt(), BigInt(), BigInt(), BigInt(), BigInt(), BigInt(), BigInt(), BigInt()) -""" -In-place: mulmod!(dst, a, b, n, tmp) sets dst = (a * b) mod n using tmp as scratch. -""" +# In-place: dst = (a * b) mod n, using tmp as scratch. @inline function _mulmod!(dst::BigInt, a::BigInt, b::BigInt, n::BigInt, tmp::BigInt) Base.GMP.MPZ.mul!(tmp, a, b) _mpz_fdiv_r!(dst, tmp, n) end -""" -Differential addition on Montgomery curve: given P, Q and P-Q, compute P+Q. -Uses projective coordinates and in-place arithmetic to avoid allocations. -""" +# Differential addition on Montgomery curve (BigInt in-place version). +# Given projective P, Q and their difference P-Q (as diff), computes P+Q. function _ecm_add!(res_X::BigInt, res_Z::BigInt, P_X::BigInt, P_Z::BigInt, Q_X::BigInt, Q_Z::BigInt, diff_X::BigInt, diff_Z::BigInt, n::BigInt, buf::ECMBuffers) t1, t2, t3, t4, t5, t6 = buf.t1, buf.t2, buf.t3, buf.t4, buf.t5, buf.t6 - # u = (P.X - P.Z) * (Q.X + Q.Z) mod n - Base.GMP.MPZ.sub!(t1, P_X, P_Z) # t1 = P.X - P.Z - Base.GMP.MPZ.add!(t2, Q_X, Q_Z) # t2 = Q.X + Q.Z - _mulmod!(t5, t1, t2, n, t3) # t5 = u - - # v = (P.X + P.Z) * (Q.X - Q.Z) mod n - Base.GMP.MPZ.add!(t1, P_X, P_Z) # t1 = P.X + P.Z - Base.GMP.MPZ.sub!(t2, Q_X, Q_Z) # t2 = Q.X - Q.Z - _mulmod!(t6, t1, t2, n, t3) # t6 = v - - # add = u + v, sub = u - v - Base.GMP.MPZ.add!(t1, t5, t6) # t1 = add = u + v - Base.GMP.MPZ.sub!(t2, t5, t6) # t2 = sub = u - v - - # X = diff.Z * add^2 mod n - _mulmod!(t3, t1, t1, n, t4) # t3 = add^2 mod n - _mulmod!(res_X, diff_Z, t3, n, t4) # res_X = diff.Z * add^2 mod n - - # Z = diff.X * sub^2 mod n - _mulmod!(t3, t2, t2, n, t4) # t3 = sub^2 mod n - _mulmod!(res_Z, diff_X, t3, n, t4) # res_Z = diff.X * sub^2 mod n + Base.GMP.MPZ.sub!(t1, P_X, P_Z) + Base.GMP.MPZ.add!(t2, Q_X, Q_Z) + _mulmod!(t5, t1, t2, n, t3) # t5 = (P.X-P.Z)(Q.X+Q.Z) mod n + Base.GMP.MPZ.add!(t1, P_X, P_Z) + Base.GMP.MPZ.sub!(t2, Q_X, Q_Z) + _mulmod!(t6, t1, t2, n, t3) # t6 = (P.X+P.Z)(Q.X-Q.Z) mod n + Base.GMP.MPZ.add!(t1, t5, t6) # t1 = t5 + t6 + Base.GMP.MPZ.sub!(t2, t5, t6) # t2 = t5 - t6 + _mulmod!(t3, t1, t1, n, t4) + _mulmod!(res_X, diff_Z, t3, n, t4) # res_X = diff.Z * (t5+t6)^2 mod n + _mulmod!(t3, t2, t2, n, t4) + _mulmod!(res_Z, diff_X, t3, n, t4) # res_Z = diff.X * (t5-t6)^2 mod n end -""" -In-place point doubling on Montgomery curve with parameter a24 = (a+2)/4. -""" +# Point doubling on Montgomery curve with a24 = (a+2)/4 (BigInt in-place version). function _ecm_double!(res_X::BigInt, res_Z::BigInt, P_X::BigInt, P_Z::BigInt, n::BigInt, a24::BigInt, buf::ECMBuffers) t1, t2, t3, t4, t5, t6 = buf.t1, buf.t2, buf.t3, buf.t4, buf.t5, buf.t6 - # u = (P.X + P.Z)^2 mod n - Base.GMP.MPZ.add!(t1, P_X, P_Z) # t1 = P.X + P.Z - _mulmod!(t5, t1, t1, n, t3) # t5 = u = (P.X+P.Z)^2 mod n - - # v = (P.X - P.Z)^2 mod n - Base.GMP.MPZ.sub!(t1, P_X, P_Z) # t1 = P.X - P.Z - _mulmod!(t6, t1, t1, n, t3) # t6 = v = (P.X-P.Z)^2 mod n - - # diff = u - v - Base.GMP.MPZ.sub!(t1, t5, t6) # t1 = diff = u - v - - # X = u * v mod n - _mulmod!(res_X, t5, t6, n, t3) # res_X = u * v mod n - - # Z = diff * (v + a24 * diff) mod n - _mulmod!(t2, a24, t1, n, t3) # t2 = a24 * diff mod n - Base.GMP.MPZ.add!(t2, t6) # t2 = v + a24 * diff - _mulmod!(res_Z, t1, t2, n, t3) # res_Z = diff * (v + a24*diff) mod n + Base.GMP.MPZ.add!(t1, P_X, P_Z) + _mulmod!(t5, t1, t1, n, t3) # t5 = (P.X+P.Z)^2 mod n + Base.GMP.MPZ.sub!(t1, P_X, P_Z) + _mulmod!(t6, t1, t1, n, t3) # t6 = (P.X-P.Z)^2 mod n + Base.GMP.MPZ.sub!(t1, t5, t6) # t1 = t5 - t6 + _mulmod!(res_X, t5, t6, n, t3) # res_X = t5 * t6 mod n + _mulmod!(t2, a24, t1, n, t3) + Base.GMP.MPZ.add!(t2, t6) + _mulmod!(res_Z, t1, t2, n, t3) # res_Z = (t5-t6) * (t6 + a24*(t5-t6)) mod n end -""" -Montgomery ladder scalar multiplication: compute [k]P on Montgomery curve. -Uses preallocated buffers to avoid allocation in the inner loop. -Returns the point [k]P as (res_X, res_Z). -""" +# Montgomery ladder scalar multiplication (BigInt in-place version). function _ecm_scalar_mul!(res_X::BigInt, res_Z::BigInt, - k::BigInt, P_X::BigInt, P_Z::BigInt, + k::Integer, P_X::BigInt, P_Z::BigInt, n::BigInt, a24::BigInt, buf::ECMBuffers) R0_X, R0_Z = buf.R0_X, buf.R0_Z R1_X, R1_Z = buf.R1_X, buf.R1_Z tmp_X, tmp_Z = buf.tmp_X, buf.tmp_Z - - # R0 = P, R1 = 2P Base.GMP.MPZ.set!(R0_X, P_X) Base.GMP.MPZ.set!(R0_Z, P_Z) _ecm_double!(R1_X, R1_Z, P_X, P_Z, n, a24, buf) - bits = ndigits(k, base=2) for i in (bits - 2):-1:0 if isodd(k >> i) @@ -146,69 +135,206 @@ function _ecm_scalar_mul!(res_X::BigInt, res_Z::BigInt, Base.GMP.MPZ.set!(res_Z, R0_Z) end +# ============================================================================= +# Generic ECM functions (functional style, no mutation). +# Work for any T<:Integer; the compiler specialises for each concrete type. +# For BitIntegers.jl types the widen chain stays in fixed-width LLVM integers, +# making these significantly faster than going through BigInt. +# ============================================================================= + +# Differential addition on Montgomery curve (generic version). +# Given projective P=(PX:PZ), Q=(QX:QZ) and their difference P-Q=(diffX:diffZ), +# returns (res_X, res_Z) = P+Q. +function _ecm_add(PX::T, PZ::T, QX::T, QZ::T, + diffX::T, diffZ::T, n::T) where {T<:Integer} + u = _mulmod(_submod(PX, PZ, n), _addmod(QX, QZ, n), n) + v = _mulmod(_addmod(PX, PZ, n), _submod(QX, QZ, n), n) + ad = _addmod(u, v, n) + sb = _submod(u, v, n) + resX = _mulmod(diffZ, _mulmod(ad, ad, n), n) + resZ = _mulmod(diffX, _mulmod(sb, sb, n), n) + resX, resZ +end + +# Point doubling on Montgomery curve with a24 = (a+2)/4 (generic version). +# Returns (res_X, res_Z). +function _ecm_double(PX::T, PZ::T, n::T, a24::T) where {T<:Integer} + u = _mulmod(_addmod(PX, PZ, n), _addmod(PX, PZ, n), n) + v = _mulmod(_submod(PX, PZ, n), _submod(PX, PZ, n), n) + diff = _submod(u, v, n) + resX = _mulmod(u, v, n) + resZ = _mulmod(diff, _addmod(v, _mulmod(a24, diff, n), n), n) + resX, resZ +end + +# Montgomery ladder scalar multiplication (generic version). +# Computes [k]P on the Montgomery curve; k is a small integer (prime power ≤ B1). +# Returns (res_X, res_Z). +function _ecm_scalar_mul(k::Integer, PX::T, PZ::T, n::T, a24::T) where {T<:Integer} + R0X, R0Z = PX, PZ + R1X, R1Z = _ecm_double(PX, PZ, n, a24) + nbits = ndigits(k, base=2) + for i in (nbits - 2):-1:0 + if isodd(k >> i) + tmpX, tmpZ = _ecm_add(R0X, R0Z, R1X, R1Z, PX, PZ, n) + R0X, R0Z = tmpX, tmpZ + R1X, R1Z = _ecm_double(R1X, R1Z, n, a24) + else + tmpX, tmpZ = _ecm_add(R0X, R0Z, R1X, R1Z, PX, PZ, n) + R1X, R1Z = tmpX, tmpZ + R0X, R0Z = _ecm_double(R0X, R0Z, n, a24) + end + end + R0X, R0Z +end + +# ============================================================================= +# Public API +# ============================================================================= + +""" + ecm_factor(n::T, B1::Int, num_curves::Int) where {T<:Integer} -> Union{T, Nothing} + +Attempt to find a non-trivial factor of `n` using the Elliptic Curve Method (ECM). + +Uses Montgomery curves with Suyama's parametrization and a batched GCD to reduce +the number of `gcd` calls in the inner loop. + +This generic method works for any integer type `T`, including fixed-width types from +[BitIntegers.jl](https://github.com/rfourquet/BitIntegers.jl) (e.g. `UInt256`, `UInt512`). +For those types modular multiplication stays within fixed-width LLVM integers via +`widen(T)`, avoiding BigInt allocation entirely. + +Returns a non-trivial factor of `n`, or `nothing` if none is found within the budget. + +See also the `BigInt`-optimised overload which uses in-place GMP arithmetic. +""" +function ecm_factor(n::T, B1::Int, num_curves::Int) where {T<:Integer} + # Precompute prime powers ≤ B1 as plain Int (they are small). + prime_powers = Int[] + for p in primes(B1) + pk = p + while pk * p <= B1 + pk *= p + end + push!(prime_powers, pk) + end + + for _ in 1:num_curves + # Suyama's parametrization: generate a random curve and initial point. + σ = T(rand(6:10^9)) + u = _submod(_mulmod(σ, σ, n), T(5), n) # (σ²-5) mod n + v = _mulmod(T(4), σ, n) # 4σ mod n + x0 = _mulmod(_mulmod(u, u, n), u, n) # u³ mod n + z0 = _mulmod(_mulmod(v, v, n), v, n) # v³ mod n + + vu_diff = _submod(v, u, n) + vu_diff3 = _mulmod(_mulmod(vu_diff, vu_diff, n), vu_diff, n) + a24_num = _mulmod(vu_diff3, _addmod(_mulmod(T(3), u, n), v, n), n) + a24_den = _mulmod(_mulmod(T(16), x0, n), v, n) + + g = gcd(a24_den, n) + if 1 < g < n + return g + end + g == n && continue + + a24_den_inv = invmod(a24_den, n) + a24 = _mulmod(a24_num, a24_den_inv, n) + + QX, QZ = x0, z0 + + # Stage 1: multiply Q by each prime power, batching the GCD checks. + degenerate = false + acc = one(T) + batch_count = 0 + for pk in prime_powers + QX, QZ = _ecm_scalar_mul(pk, QX, QZ, n, a24) + acc = _mulmod(acc, QZ, n) + batch_count += 1 + if batch_count >= 100 + g = gcd(acc, n) + if 1 < g < n + return g + end + if g == n + degenerate = true + break + end + acc = one(T) + batch_count = 0 + end + end + + degenerate && continue + + if batch_count > 0 + g = gcd(acc, n) + 1 < g < n && return g + end + end + return nothing +end + """ ecm_factor(n::BigInt, B1::Int, num_curves::Int) -> Union{BigInt, Nothing} -Attempt to find a non-trivial factor of `n` using the Elliptic Curve Method. -Computes [m]P where m = lcm(1..B1) = prod(p^floor(log_p(B1)) for p prime ≤ B1). -Uses batched gcd (accumulate Z coordinates, check periodically) to reduce gcd calls. -Returns a factor or `nothing` if none found within the curve budget. +BigInt-optimised ECM using in-place GMP arithmetic to minimise allocations in the +hot Montgomery ladder loop. For fixed-width integer types use the generic method. + +Returns a non-trivial factor of `n`, or `nothing` if none is found within the budget. """ function ecm_factor(n::BigInt, B1::Int, num_curves::Int)::Union{BigInt, Nothing} - # Precompute prime powers for Stage 1 - prime_powers = BigInt[] + # Precompute prime powers for Stage 1. + prime_powers = Int[] for p in primes(B1) - pk = BigInt(p) + pk = p while pk * p <= B1 pk *= p end push!(prime_powers, pk) end - buf = ECMBuffers() - Q_X = BigInt() - Q_Z = BigInt() - tmp_mul = BigInt() # scratch for acc * Q.Z + buf = ECMBuffers() + Q_X = BigInt() + Q_Z = BigInt() + tmp_mul = BigInt() for _ in 1:num_curves - # Generate random curve via σ parameter (Suyama's parametrization) - σ = BigInt(rand(6:10^9)) - u = mod(σ * σ - 5, n) - v = mod(4 * σ, n) - x0 = mod(u * u * u, n) - z0 = mod(v * v * v, n) - + # Suyama's parametrization. + σ = BigInt(rand(6:10^9)) + u = mod(σ * σ - 5, n) + v = mod(4 * σ, n) + x0 = mod(u * u * u, n) + z0 = mod(v * v * v, n) vu_diff = mod(v - u, n) a24_num = mod(vu_diff^3 * mod(3 * u + v, n), n) a24_den = mod(16 * x0 * v, n) g = gcd(a24_den, n) - if g > 1 && g < n + if 1 < g < n return g end - if g == n - continue - end + g == n && continue a24_den_inv = invmod(a24_den, n) - a24 = mod(a24_num * a24_den_inv, n) + a24 = mod(a24_num * a24_den_inv, n) Base.GMP.MPZ.set!(Q_X, x0) Base.GMP.MPZ.set!(Q_Z, z0) - # Stage 1: multiply Q by each prime power, with batched gcd - degenerate = false - acc = BigInt(1) + # Stage 1: multiply Q by each prime power, with batched GCD. + degenerate = false + acc = BigInt(1) batch_count = 0 for pk in prime_powers _ecm_scalar_mul!(Q_X, Q_Z, pk, Q_X, Q_Z, n, a24, buf) Base.GMP.MPZ.mul!(tmp_mul, acc, Q_Z) _mpz_fdiv_r!(acc, tmp_mul, n) batch_count += 1 - if batch_count >= 100 g = gcd(acc, n) - if g > 1 && g < n + if 1 < g < n return g end if g == n @@ -224,9 +350,7 @@ function ecm_factor(n::BigInt, B1::Int, num_curves::Int)::Union{BigInt, Nothing} if batch_count > 0 g = gcd(acc, n) - if g > 1 && g < n - return g - end + 1 < g < n && return g end end return nothing diff --git a/test/runtests.jl b/test/runtests.jl index e064d5d..a89fc0c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -555,7 +555,7 @@ end end @testset "ECM edge cases" begin - # ECM with a small semi-prime + # ECM with a small semi-prime (BigInt path — GMP in-place) n = big"1000000007" * big"1000000009" result = Primes.ecm_factor(n, 2000, 50) @test result !== nothing @@ -566,3 +566,35 @@ end result2 = Primes.ecm_factor(hard, 100, 1) # May or may not find a factor with just 1 curve — no assertion on result end + +@testset "ECM generic integer types" begin + # UInt128: widen(UInt128) == BigInt in base Julia — exercises the generic + # _mulmod/_addmod/_submod helpers via the BigInt widen path. + p = UInt128(1_000_000_007) * UInt128(999_999_937) + q = UInt128(1_000_000_009) + n = p * q + result = Primes.ecm_factor(n, 2000, 50) + @test result !== nothing + @test mod(n, result) == 0 + @test result isa UInt128 + + # UInt256 / UInt512 from BitIntegers.jl: widen stays in LLVM fixed-width + # integers (UInt256 → UInt512), so no BigInt allocation in the inner loop. + using BitIntegers + p256 = UInt256(1_000_000_007) * UInt256(999_999_937) + q256 = UInt256(1_000_000_009) + n256 = p256 * q256 + r256 = Primes.ecm_factor(n256, 2000, 50) + @test r256 !== nothing + @test mod(n256, r256) == 0 + @test r256 isa UInt256 + + # Int256 (signed BitIntegers type) + p256s = Int256(1_000_000_007) * Int256(999_999_937) + q256s = Int256(1_000_000_009) + n256s = p256s * q256s + r256s = Primes.ecm_factor(n256s, 2000, 50) + @test r256s !== nothing + @test mod(n256s, r256s) == 0 + @test r256s isa Int256 +end From 7d0d007ebad5666c09af6c1d3b26e5dc0c59c37e Mon Sep 17 00:00:00 2001 From: s-celles Date: Thu, 12 Mar 2026 10:07:21 +0100 Subject: [PATCH 3/8] fix: prevent _addmod overflow for unsigned types near typemax Use 'a >= n - b ? a - (n - b) : a + b' instead of computing a + b first, which could silently wrap around for unsigned T when n is close to typemax(T). Also eliminate redundant _addmod/_submod calls in _ecm_double by computing the sum and difference of PX, PZ once. --- src/ecm.jl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/ecm.jl b/src/ecm.jl index c689255..6d7cb40 100644 --- a/src/ecm.jl +++ b/src/ecm.jl @@ -11,10 +11,10 @@ # ============================================================================= # Modular addition: (a + b) mod n, for a, b ∈ [0, n). -# Safe as long as a + b does not overflow T, which holds when n ≪ typemax(T). +# Avoids computing a + b directly, which could overflow for unsigned T +# when n is close to typemax(T). @inline function _addmod(a::T, b::T, n::T) where {T<:Integer} - r = a + b - r >= n ? r - n : r + a >= n - b ? a - (n - b) : a + b end # Modular subtraction: (a - b) mod n, for a, b ∈ [0, n). @@ -159,8 +159,10 @@ end # Point doubling on Montgomery curve with a24 = (a+2)/4 (generic version). # Returns (res_X, res_Z). function _ecm_double(PX::T, PZ::T, n::T, a24::T) where {T<:Integer} - u = _mulmod(_addmod(PX, PZ, n), _addmod(PX, PZ, n), n) - v = _mulmod(_submod(PX, PZ, n), _submod(PX, PZ, n), n) + s = _addmod(PX, PZ, n) + d = _submod(PX, PZ, n) + u = _mulmod(s, s, n) + v = _mulmod(d, d, n) diff = _submod(u, v, n) resX = _mulmod(u, v, n) resZ = _mulmod(diff, _addmod(v, _mulmod(a24, diff, n), n), n) From 7f077df371ffd5a237d2786bb063c2bce6abc287 Mon Sep 17 00:00:00 2001 From: s-celles Date: Thu, 12 Mar 2026 10:18:11 +0100 Subject: [PATCH 4/8] refactor: extract shared ECM helpers to reduce code duplication Extract _ecm_prime_powers(B1) and _ecm_suyama(n) shared helpers used by both the generic and BigInt-optimized ecm_factor methods. The Suyama parametrization (curve setup) is not performance-critical, so both paths now share the generic modular arithmetic version. Only the hot Montgomery ladder inner loop retains a separate BigInt in-place implementation. --- src/ecm.jl | 106 +++++++++++++++++++++++++---------------------------- 1 file changed, 50 insertions(+), 56 deletions(-) diff --git a/src/ecm.jl b/src/ecm.jl index 6d7cb40..8ca70dc 100644 --- a/src/ecm.jl +++ b/src/ecm.jl @@ -31,6 +31,46 @@ end T(mod(W(a) * W(b), W(n))) end +# ============================================================================= +# Shared helpers used by both the generic and BigInt-optimized ECM paths. +# ============================================================================= + +# Precompute prime powers p^k ≤ B1 for each prime p ≤ B1. +function _ecm_prime_powers(B1::Int) + result = Int[] + for p in primes(B1) + pk = p + while pk * p <= B1 + pk *= p + end + push!(result, pk) + end + result +end + +# Suyama's parametrization: generate a random Montgomery curve and initial point. +# Returns (x0, z0, a24) on success, a lucky factor g::T, or nothing for a +# degenerate curve (gcd == n). +function _ecm_suyama(n::T) where {T<:Integer} + σ = T(rand(6:10^9)) + u = _submod(_mulmod(σ, σ, n), T(5), n) + v = _mulmod(T(4), σ, n) + x0 = _mulmod(_mulmod(u, u, n), u, n) + z0 = _mulmod(_mulmod(v, v, n), v, n) + + vu_diff = _submod(v, u, n) + vu_diff3 = _mulmod(_mulmod(vu_diff, vu_diff, n), vu_diff, n) + a24_num = _mulmod(vu_diff3, _addmod(_mulmod(T(3), u, n), v, n), n) + a24_den = _mulmod(_mulmod(T(16), x0, n), v, n) + + g = gcd(a24_den, n) + 1 < g < n && return g + g == n && return nothing + + a24 = _mulmod(a24_num, invmod(a24_den, n), n) + (x0, z0, a24) +end + # ============================================================================= # GMP-optimized path for BigInt: in-place arithmetic avoids allocations in # the hot Montgomery ladder loop, which matters for large-integer factoring. @@ -212,37 +252,13 @@ Returns a non-trivial factor of `n`, or `nothing` if none is found within the bu See also the `BigInt`-optimised overload which uses in-place GMP arithmetic. """ function ecm_factor(n::T, B1::Int, num_curves::Int) where {T<:Integer} - # Precompute prime powers ≤ B1 as plain Int (they are small). - prime_powers = Int[] - for p in primes(B1) - pk = p - while pk * p <= B1 - pk *= p - end - push!(prime_powers, pk) - end + prime_powers = _ecm_prime_powers(B1) for _ in 1:num_curves - # Suyama's parametrization: generate a random curve and initial point. - σ = T(rand(6:10^9)) - u = _submod(_mulmod(σ, σ, n), T(5), n) # (σ²-5) mod n - v = _mulmod(T(4), σ, n) # 4σ mod n - x0 = _mulmod(_mulmod(u, u, n), u, n) # u³ mod n - z0 = _mulmod(_mulmod(v, v, n), v, n) # v³ mod n - - vu_diff = _submod(v, u, n) - vu_diff3 = _mulmod(_mulmod(vu_diff, vu_diff, n), vu_diff, n) - a24_num = _mulmod(vu_diff3, _addmod(_mulmod(T(3), u, n), v, n), n) - a24_den = _mulmod(_mulmod(T(16), x0, n), v, n) - - g = gcd(a24_den, n) - if 1 < g < n - return g - end - g == n && continue - - a24_den_inv = invmod(a24_den, n) - a24 = _mulmod(a24_num, a24_den_inv, n) + curve = _ecm_suyama(n) + curve === nothing && continue + curve isa Tuple || return curve # lucky factor from gcd + x0, z0, a24 = curve QX, QZ = x0, z0 @@ -287,15 +303,7 @@ hot Montgomery ladder loop. For fixed-width integer types use the generic metho Returns a non-trivial factor of `n`, or `nothing` if none is found within the budget. """ function ecm_factor(n::BigInt, B1::Int, num_curves::Int)::Union{BigInt, Nothing} - # Precompute prime powers for Stage 1. - prime_powers = Int[] - for p in primes(B1) - pk = p - while pk * p <= B1 - pk *= p - end - push!(prime_powers, pk) - end + prime_powers = _ecm_prime_powers(B1) buf = ECMBuffers() Q_X = BigInt() @@ -303,24 +311,10 @@ function ecm_factor(n::BigInt, B1::Int, num_curves::Int)::Union{BigInt, Nothing} tmp_mul = BigInt() for _ in 1:num_curves - # Suyama's parametrization. - σ = BigInt(rand(6:10^9)) - u = mod(σ * σ - 5, n) - v = mod(4 * σ, n) - x0 = mod(u * u * u, n) - z0 = mod(v * v * v, n) - vu_diff = mod(v - u, n) - a24_num = mod(vu_diff^3 * mod(3 * u + v, n), n) - a24_den = mod(16 * x0 * v, n) - - g = gcd(a24_den, n) - if 1 < g < n - return g - end - g == n && continue - - a24_den_inv = invmod(a24_den, n) - a24 = mod(a24_num * a24_den_inv, n) + curve = _ecm_suyama(n) + curve === nothing && continue + curve isa Tuple || return curve # lucky factor from gcd + x0, z0, a24 = curve Base.GMP.MPZ.set!(Q_X, x0) Base.GMP.MPZ.set!(Q_Z, z0) From 3c5030fbaa96bf53ecd64920ac44631b9875691e Mon Sep 17 00:00:00 2001 From: s-celles Date: Thu, 12 Mar 2026 15:25:34 +0100 Subject: [PATCH 5/8] perf: benchmark --- .gitignore | 7 ++ benchmarks/Project.toml | 7 ++ benchmarks/README.md | 61 ++++++++++ benchmarks/ecm_gmp_vs_generic.jl | 116 ++++++++++++++++++ benchmarks/ecm_microbenchmarks.jl | 172 +++++++++++++++++++++++++++ benchmarks/factorization_endtoend.jl | 52 ++++++++ benchmarks/run_all.jl | 25 ++++ docs/make.jl | 3 +- docs/src/benchmarks.md | 61 ++++++++++ 9 files changed, 503 insertions(+), 1 deletion(-) create mode 100644 benchmarks/Project.toml create mode 100644 benchmarks/README.md create mode 100644 benchmarks/ecm_gmp_vs_generic.jl create mode 100644 benchmarks/ecm_microbenchmarks.jl create mode 100644 benchmarks/factorization_endtoend.jl create mode 100644 benchmarks/run_all.jl create mode 100644 docs/src/benchmarks.md diff --git a/.gitignore b/.gitignore index 63d7e8b..c84633f 100644 --- a/.gitignore +++ b/.gitignore @@ -14,14 +14,21 @@ !src/ !src/*.jl + !test/ !test/*.jl + !docs/ !docs/src/ !docs/src/*.md !docs/make.jl !docs/Project.toml +!benchmarks/ +!benchmarks/*.jl +!benchmarks/Project.toml +!benchmarks/README.md + ### Denied even if allowed above ### # Files generated by invoking Julia with --code-coverage diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml new file mode 100644 index 0000000..76683dd --- /dev/null +++ b/benchmarks/Project.toml @@ -0,0 +1,7 @@ +[deps] +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +BitIntegers = "c3b6d118-76ef-56ca-8cc7-ebb389d030a1" +Primes = "27ebfcd6-29c5-5fa9-bf4b-fb8fc14df3ae" + +[sources] +Primes = {path = ".."} diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 0000000..d17802f --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,61 @@ +# Benchmarks + +Benchmark suite for Primes.jl, comparing GMP-optimised in-place arithmetic against +the generic (allocating) code path for BigInt operations, and measuring end-to-end +factorisation performance across different number sizes. + +## Prerequisites + +```bash +julia --project=benchmarks -e 'using Pkg; Pkg.instantiate()' +``` + +## Running + +Run all benchmarks: + +```bash +julia --project=benchmarks benchmarks/run_all.jl +``` + +Or run individual suites: + +```bash +# GMP in-place vs generic (allocating) ECM — the core comparison +julia --project=benchmarks benchmarks/ecm_gmp_vs_generic.jl + +# Micro-benchmarks for individual ECM operations +julia --project=benchmarks benchmarks/ecm_microbenchmarks.jl + +# End-to-end factor(n) at various sizes +julia --project=benchmarks benchmarks/factorization_endtoend.jl +``` + +## Benchmark Suites + +### `ecm_gmp_vs_generic.jl` + +Compares the two ECM code paths for `BigInt` inputs: + +| Path | Style | Allocations | +|------|-------|-------------| +| `ecm_factor(n::BigInt, ...)` | In-place GMP (`Base.GMP.MPZ.*!`) | Near-zero in hot loop | +| Generic via `_ecm_scalar_mul` | Functional (new BigInt per op) | O(k) per scalar multiply | + +Tests at two scales: a ~40-digit and a ~55-digit semiprime. + +### `ecm_microbenchmarks.jl` + +Benchmarks individual ECM building blocks across three execution modes: + +| Operation | GMP in-place | Generic BigInt | UInt128 (LLVM) | +|-----------|-------------|----------------|----------------| +| `_mulmod` | `_mulmod!` | `_mulmod` | `_mulmod` | +| `_ecm_add` | `_ecm_add!` | `_ecm_add` | `_ecm_add` | +| `_ecm_double` | `_ecm_double!` | `_ecm_double` | `_ecm_double` | +| `_ecm_scalar_mul` | `_ecm_scalar_mul!` | `_ecm_scalar_mul` | `_ecm_scalar_mul` | + +### `factorization_endtoend.jl` + +Benchmarks `factor(n)` for semiprimes ranging from 12 to 45+ digits, +showing how the polyalgorithm (trial division → Pollard rho → ECM → MPQS) scales. diff --git a/benchmarks/ecm_gmp_vs_generic.jl b/benchmarks/ecm_gmp_vs_generic.jl new file mode 100644 index 0000000..0598878 --- /dev/null +++ b/benchmarks/ecm_gmp_vs_generic.jl @@ -0,0 +1,116 @@ +# ECM Benchmark: GMP In-Place vs Generic (Allocating) BigInt Path +# +# Compares the two ECM code paths for BigInt inputs: +# 1. BigInt-specialised: uses in-place GMP arithmetic (zero-allocation hot loop) +# 2. Generic: functional style, allocates new BigInts per operation +# +# Usage: +# julia --project=benchmarks benchmarks/ecm_gmp_vs_generic.jl + +using Primes +using BenchmarkTools + +# Reproduce the generic (allocating) ECM path for BigInt, bypassing the +# BigInt-specialised method that normally intercepts dispatch. +function ecm_factor_generic(n::BigInt, B1::Int, num_curves::Int) + prime_powers = Primes._ecm_prime_powers(B1) + T = BigInt + for _ in 1:num_curves + curve = Primes._ecm_suyama(n) + curve === nothing && continue + curve isa Tuple || return curve + x0, z0, a24 = curve + + QX, QZ = x0, z0 + degenerate = false + acc = one(T) + batch_count = 0 + for pk in prime_powers + QX, QZ = Primes._ecm_scalar_mul(pk, QX, QZ, n, a24) + acc = Primes._mulmod(acc, QZ, n) + batch_count += 1 + if batch_count >= 100 + g = gcd(acc, n) + if 1 < g < n + return g + end + if g == n + degenerate = true + break + end + acc = one(T) + batch_count = 0 + end + end + degenerate && continue + if batch_count > 0 + g = gcd(acc, n) + 1 < g < n && return g + end + end + return nothing +end + +# --------------------------------------------------------------------------- +# Test cases at different scales +# --------------------------------------------------------------------------- +const CASES = [ + ( + name = "small (~40-digit semiprime)", + n = big"824633720831" * big"1000000007", + B1 = 2_000, + curves = 50, + ), + ( + name = "medium (~55-digit semiprime)", + n = big"780002082420426809" * big"810735269523504809437013569", + B1 = 11_000, + curves = 200, + ), +] + +# --------------------------------------------------------------------------- +# Run benchmarks +# --------------------------------------------------------------------------- +println("=" ^ 70) +println(" ECM Benchmark: GMP In-Place vs Generic (Allocating) BigInt") +println("=" ^ 70) + +for case in CASES + (; name, n, B1, curves) = case + println("\n--- $name ---") + println(" n = $n ($(ndigits(n)) digits)") + println(" B1 = $B1") + println(" curves = $curves") + + # Warm-up both paths + Primes.ecm_factor(n, B1, curves) + ecm_factor_generic(n, B1, curves) + + println("\n [GMP in-place]") + b_gmp = @benchmark Primes.ecm_factor($n, $B1, $curves) samples=20 evals=1 + display(b_gmp) + + println("\n [Generic (allocating)]") + b_gen = @benchmark ecm_factor_generic($n, $B1, $curves) samples=20 evals=1 + display(b_gen) + + med_gmp = median(b_gmp).time / 1e6 + med_gen = median(b_gen).time / 1e6 + alloc_gmp = median(b_gmp).memory / 1024 + alloc_gen = median(b_gen).memory / 1024 + + println("\n Summary (median):") + println(" GMP in-place : $(round(med_gmp; digits=2)) ms, $(round(alloc_gmp; digits=1)) KiB") + println(" Generic alloc: $(round(med_gen; digits=2)) ms, $(round(alloc_gen; digits=1)) KiB") + if med_gmp > 0 + println(" Speedup : $(round(med_gen / med_gmp; digits=2))×") + end + if alloc_gen > 0 + println(" Memory saved : $(round((1 - alloc_gmp / alloc_gen) * 100; digits=1))%") + end +end + +println("\n" * "=" ^ 70) +println(" Done.") +println("=" ^ 70) diff --git a/benchmarks/ecm_microbenchmarks.jl b/benchmarks/ecm_microbenchmarks.jl new file mode 100644 index 0000000..8720111 --- /dev/null +++ b/benchmarks/ecm_microbenchmarks.jl @@ -0,0 +1,172 @@ +# ECM Micro-Benchmarks +# +# Benchmarks individual ECM building blocks: +# - _mulmod / _mulmod! (modular multiplication) +# - _ecm_add / _ecm_add! (differential point addition) +# - _ecm_double / _ecm_double! (point doubling) +# - _ecm_scalar_mul / _ecm_scalar_mul! (Montgomery ladder) +# +# Each operation is tested in four modes: +# 1. BigInt GMP in-place (specialised path) +# 2. BigInt generic (allocating, functional) +# 3. UInt128 generic (note: widen(UInt128) == BigInt, so still allocates) +# 4. UInt256 generic via BitIntegers.jl (widen stays in LLVM fixed-width) +# +# Usage: +# julia --project=benchmarks benchmarks/ecm_microbenchmarks.jl + +using Primes +using BenchmarkTools +using BitIntegers + +# --------------------------------------------------------------------------- +# Setup: representative values for a ~40-digit modulus +# --------------------------------------------------------------------------- +const N_BIG = big"824633720831" * big"1000000007" +const A24_BIG = mod(big"123456789012345", N_BIG) +const PX_BIG = mod(big"314159265358979323846", N_BIG) +const PZ_BIG = mod(big"271828182845904523536", N_BIG) +const QX_BIG = mod(big"141421356237309504880", N_BIG) +const QZ_BIG = mod(big"173205080756887729352", N_BIG) +const DX_BIG = mod(big"223606797749978969640", N_BIG) +const DZ_BIG = mod(big"264575131106459059050", N_BIG) + +# UInt128 versions (note: widen(UInt128) == BigInt in base Julia) +const N_U128 = UInt128(824633720831) * UInt128(1000000007) +const A24_U128 = UInt128(mod(big"123456789012345", big(N_U128))) +const PX_U128 = UInt128(mod(big"314159265358979323846", big(N_U128))) +const PZ_U128 = UInt128(mod(big"271828182845904523536", big(N_U128))) +const QX_U128 = UInt128(mod(big"141421356237309504880", big(N_U128))) +const QZ_U128 = UInt128(mod(big"173205080756887729352", big(N_U128))) +const DX_U128 = UInt128(mod(big"223606797749978969640", big(N_U128))) +const DZ_U128 = UInt128(mod(big"264575131106459059050", big(N_U128))) + +# UInt256 versions (widen(UInt256) == UInt512, stays in LLVM fixed-width) +const N_U256 = UInt256(824633720831) * UInt256(1000000007) +const A24_U256 = UInt256(mod(big"123456789012345", big(N_U256))) +const PX_U256 = UInt256(mod(big"314159265358979323846", big(N_U256))) +const PZ_U256 = UInt256(mod(big"271828182845904523536", big(N_U256))) +const QX_U256 = UInt256(mod(big"141421356237309504880", big(N_U256))) +const QZ_U256 = UInt256(mod(big"173205080756887729352", big(N_U256))) +const DX_U256 = UInt256(mod(big"223606797749978969640", big(N_U256))) +const DZ_U256 = UInt256(mod(big"264575131106459059050", big(N_U256))) + +# GMP buffers +const BUF = Primes.ECMBuffers() +const TMP_MUL = BigInt() + +println("=" ^ 70) +println(" ECM Micro-Benchmarks") +println("=" ^ 70) +println() +println(" Note: widen(UInt128) == BigInt → still allocates (no LLVM benefit)") +println(" widen(UInt256) == UInt512 → pure LLVM fixed-width arithmetic") + +# --------------------------------------------------------------------------- +# 1. Modular multiplication +# --------------------------------------------------------------------------- +println("\n--- _mulmod ---") + +print(" BigInt GMP in-place (_mulmod!): ") +dst = BigInt() +b1 = @benchmark Primes._mulmod!($dst, $PX_BIG, $PZ_BIG, $N_BIG, $TMP_MUL) +println("$(round(median(b1).time; digits=1)) ns, $(median(b1).memory) bytes") + +print(" BigInt generic (_mulmod): ") +b2 = @benchmark Primes._mulmod($PX_BIG, $PZ_BIG, $N_BIG) +println("$(round(median(b2).time; digits=1)) ns, $(median(b2).memory) bytes") + +print(" UInt128 generic (_mulmod): ") +b3 = @benchmark Primes._mulmod($PX_U128, $PZ_U128, $N_U128) +println("$(round(median(b3).time; digits=1)) ns, $(median(b3).memory) bytes") + +print(" UInt256 generic (_mulmod): ") +b3b = @benchmark Primes._mulmod($PX_U256, $PZ_U256, $N_U256) +println("$(round(median(b3b).time; digits=1)) ns, $(median(b3b).memory) bytes") + +# --------------------------------------------------------------------------- +# 2. Differential point addition +# --------------------------------------------------------------------------- +println("\n--- _ecm_add ---") + +res_X, res_Z = BigInt(), BigInt() + +print(" BigInt GMP in-place (_ecm_add!): ") +b4 = @benchmark Primes._ecm_add!($res_X, $res_Z, + $PX_BIG, $PZ_BIG, $QX_BIG, $QZ_BIG, $DX_BIG, $DZ_BIG, $N_BIG, $BUF) +println("$(round(median(b4).time; digits=1)) ns, $(median(b4).memory) bytes") + +print(" BigInt generic (_ecm_add): ") +b5 = @benchmark Primes._ecm_add($PX_BIG, $PZ_BIG, $QX_BIG, $QZ_BIG, $DX_BIG, $DZ_BIG, $N_BIG) +println("$(round(median(b5).time; digits=1)) ns, $(median(b5).memory) bytes") + +print(" UInt128 generic (_ecm_add): ") +b6 = @benchmark Primes._ecm_add($PX_U128, $PZ_U128, $QX_U128, $QZ_U128, $DX_U128, $DZ_U128, $N_U128) +println("$(round(median(b6).time; digits=1)) ns, $(median(b6).memory) bytes") + +print(" UInt256 generic (_ecm_add): ") +b6b = @benchmark Primes._ecm_add($PX_U256, $PZ_U256, $QX_U256, $QZ_U256, $DX_U256, $DZ_U256, $N_U256) +println("$(round(median(b6b).time; digits=1)) ns, $(median(b6b).memory) bytes") + +# --------------------------------------------------------------------------- +# 3. Point doubling +# --------------------------------------------------------------------------- +println("\n--- _ecm_double ---") + +print(" BigInt GMP in-place (_ecm_double!): ") +b7 = @benchmark Primes._ecm_double!($res_X, $res_Z, $PX_BIG, $PZ_BIG, $N_BIG, $A24_BIG, $BUF) +println("$(round(median(b7).time; digits=1)) ns, $(median(b7).memory) bytes") + +print(" BigInt generic (_ecm_double): ") +b8 = @benchmark Primes._ecm_double($PX_BIG, $PZ_BIG, $N_BIG, $A24_BIG) +println("$(round(median(b8).time; digits=1)) ns, $(median(b8).memory) bytes") + +print(" UInt128 generic (_ecm_double): ") +b9 = @benchmark Primes._ecm_double($PX_U128, $PZ_U128, $N_U128, $A24_U128) +println("$(round(median(b9).time; digits=1)) ns, $(median(b9).memory) bytes") + +print(" UInt256 generic (_ecm_double): ") +b9b = @benchmark Primes._ecm_double($PX_U256, $PZ_U256, $N_U256, $A24_U256) +println("$(round(median(b9b).time; digits=1)) ns, $(median(b9b).memory) bytes") + +# --------------------------------------------------------------------------- +# 4. Montgomery ladder scalar multiplication +# --------------------------------------------------------------------------- +println("\n--- _ecm_scalar_mul (k=997, prime) ---") + +const K = 997 + +print(" BigInt GMP in-place (_ecm_scalar_mul!): ") +b10 = @benchmark Primes._ecm_scalar_mul!($res_X, $res_Z, $K, $PX_BIG, $PZ_BIG, $N_BIG, $A24_BIG, $BUF) +println("$(round(median(b10).time / 1e3; digits=2)) μs, $(median(b10).memory) bytes") + +print(" BigInt generic (_ecm_scalar_mul): ") +b11 = @benchmark Primes._ecm_scalar_mul($K, $PX_BIG, $PZ_BIG, $N_BIG, $A24_BIG) +println("$(round(median(b11).time / 1e3; digits=2)) μs, $(median(b11).memory) bytes") + +print(" UInt128 generic (_ecm_scalar_mul): ") +b12 = @benchmark Primes._ecm_scalar_mul($K, $PX_U128, $PZ_U128, $N_U128, $A24_U128) +println("$(round(median(b12).time / 1e3; digits=2)) μs, $(median(b12).memory) bytes") + +print(" UInt256 generic (_ecm_scalar_mul): ") +b12b = @benchmark Primes._ecm_scalar_mul($K, $PX_U256, $PZ_U256, $N_U256, $A24_U256) +println("$(round(median(b12b).time / 1e3; digits=2)) μs, $(median(b12b).memory) bytes") + +# --------------------------------------------------------------------------- +# Summary table +# --------------------------------------------------------------------------- +println("\n" * "=" ^ 70) +println(" Summary: median time") +println(" (GMP in-place → Generic BigInt → UInt128 → UInt256)") +println("=" ^ 70) + +function fmt_speedup(base, other) + s = other / base + "$(round(s; digits=1))×" +end + +println(" _mulmod: $(round(median(b1).time; digits=0)) ns → $(round(median(b2).time; digits=0)) ns ($(fmt_speedup(median(b1).time, median(b2).time))) → $(round(median(b3).time; digits=0)) ns → $(round(median(b3b).time; digits=0)) ns ($(fmt_speedup(median(b1).time, median(b3b).time)) vs GMP)") +println(" _ecm_add: $(round(median(b4).time; digits=0)) ns → $(round(median(b5).time; digits=0)) ns ($(fmt_speedup(median(b4).time, median(b5).time))) → $(round(median(b6).time; digits=0)) ns → $(round(median(b6b).time; digits=0)) ns ($(fmt_speedup(median(b4).time, median(b6b).time)) vs GMP)") +println(" _ecm_double: $(round(median(b7).time; digits=0)) ns → $(round(median(b8).time; digits=0)) ns ($(fmt_speedup(median(b7).time, median(b8).time))) → $(round(median(b9).time; digits=0)) ns → $(round(median(b9b).time; digits=0)) ns ($(fmt_speedup(median(b7).time, median(b9b).time)) vs GMP)") +println(" _ecm_scalar_mul: $(round(median(b10).time/1e3; digits=1)) μs → $(round(median(b11).time/1e3; digits=1)) μs ($(fmt_speedup(median(b10).time, median(b11).time))) → $(round(median(b12).time/1e3; digits=1)) μs → $(round(median(b12b).time/1e3; digits=1)) μs ($(fmt_speedup(median(b10).time, median(b12b).time)) vs GMP)") +println("=" ^ 70) diff --git a/benchmarks/factorization_endtoend.jl b/benchmarks/factorization_endtoend.jl new file mode 100644 index 0000000..17887cf --- /dev/null +++ b/benchmarks/factorization_endtoend.jl @@ -0,0 +1,52 @@ +# End-to-End Factorization Benchmarks +# +# Benchmarks `factor(n)` for semiprimes of increasing size to show the +# polyalgorithm's performance across different regimes: +# - Small (< 20 digits): trial division + Pollard rho +# - Medium (20-40 digits): ECM kicks in +# - Large (40-60 digits): ECM + MPQS +# - Very large (60+ digits): MPQS-dominated +# +# Usage: +# julia --project=benchmarks benchmarks/factorization_endtoend.jl + +using Primes +using BenchmarkTools + +const CASES = [ + (name = "~12 digits", n = big"824633720831"), + (name = "~20 digits", n = big"99999999999999999877" ), # 20-digit semiprime-like + (name = "~30 digits", n = big"824633720831" * big"1000000007"), + (name = "~40 digits", n = big"2152302898747" * big"99999999999999999877"), + (name = "~45 digits", n = big"780002082420426809" * big"810735269523504809437013569"), +] + +println("=" ^ 70) +println(" End-to-End factor(n) Benchmarks") +println("=" ^ 70) + +for case in CASES + (; name, n) = case + println("\n--- $name ($(ndigits(n)) digits) ---") + println(" n = $n") + + # Warm-up and verify correctness + f = factor(n) + @assert prodfactors(f) == n "Factorization verification failed for $name" + println(" factor(n) = $f") + + # Benchmark + samples = ndigits(n) > 40 ? 5 : 20 + b = @benchmark factor($n) samples=samples evals=1 + display(b) + + med = median(b).time + unit = med < 1e6 ? ("$(round(med / 1e3; digits=2)) μs") : + med < 1e9 ? ("$(round(med / 1e6; digits=2)) ms") : + ("$(round(med / 1e9; digits=2)) s") + println(" Median: $unit, $(round(median(b).memory / 1024; digits=1)) KiB allocated") +end + +println("\n" * "=" ^ 70) +println(" Done.") +println("=" ^ 70) diff --git a/benchmarks/run_all.jl b/benchmarks/run_all.jl new file mode 100644 index 0000000..90e8c83 --- /dev/null +++ b/benchmarks/run_all.jl @@ -0,0 +1,25 @@ +# Run All Benchmarks +# +# Usage: +# julia --project=benchmarks benchmarks/run_all.jl +# +# Or run individual benchmarks: +# julia --project=benchmarks benchmarks/ecm_gmp_vs_generic.jl +# julia --project=benchmarks benchmarks/ecm_microbenchmarks.jl +# julia --project=benchmarks benchmarks/factorization_endtoend.jl + +const BENCHDIR = @__DIR__ + +const SCRIPTS = [ + "ecm_microbenchmarks.jl", + "ecm_gmp_vs_generic.jl", + "factorization_endtoend.jl", +] + +for script in SCRIPTS + path = joinpath(BENCHDIR, script) + println("\n" * "#" ^ 70) + println("# Running: $script") + println("#" ^ 70 * "\n") + include(path) +end diff --git a/docs/make.jl b/docs/make.jl index 73117d9..a0552bd 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -7,7 +7,8 @@ makedocs( checkdocs = :none, pages = Any[ "Home" => "index.md", - "Functions" => "api.md" + "Functions" => "api.md", + "Benchmarks" => "benchmarks.md" ], ) diff --git a/docs/src/benchmarks.md b/docs/src/benchmarks.md new file mode 100644 index 0000000..cd8fc34 --- /dev/null +++ b/docs/src/benchmarks.md @@ -0,0 +1,61 @@ +# Benchmarks + +Primes.jl ships a benchmark suite in `benchmarks/` for measuring the performance of +its factorisation algorithms, and comparing the GMP-optimised BigInt code path against +the generic (allocating) implementation. + +## Running Benchmarks + +Install dependencies once: + +```bash +julia --project=benchmarks -e 'using Pkg; Pkg.instantiate()' +``` + +Run all benchmarks: + +```bash +julia --project=benchmarks benchmarks/run_all.jl +``` + +Or run individual suites: + +```bash +julia --project=benchmarks benchmarks/ecm_gmp_vs_generic.jl +julia --project=benchmarks benchmarks/ecm_microbenchmarks.jl +julia --project=benchmarks benchmarks/factorization_endtoend.jl +``` + +## Benchmark Suites + +### ECM: GMP In-Place vs Generic + +`ecm_gmp_vs_generic.jl` compares the two ECM code paths for `BigInt` inputs: + +- **GMP in-place** (`ecm_factor(n::BigInt, ...)`): Uses `Base.GMP.MPZ.*!` functions for + zero-allocation arithmetic in the Montgomery ladder hot loop. +- **Generic** (via `_ecm_scalar_mul`): Functional style that allocates a new `BigInt` for + every modular operation. + +### ECM Micro-Benchmarks + +`ecm_microbenchmarks.jl` measures individual ECM building blocks across four execution +modes: + +| Operation | GMP in-place | Generic BigInt | UInt128 | UInt256 (LLVM) | +|-----------|-------------|----------------|---------|----------------| +| Modular multiply | `_mulmod!` | `_mulmod` | `_mulmod` | `_mulmod` | +| Point addition | `_ecm_add!` | `_ecm_add` | `_ecm_add` | `_ecm_add` | +| Point doubling | `_ecm_double!` | `_ecm_double` | `_ecm_double` | `_ecm_double` | +| Scalar multiply | `_ecm_scalar_mul!` | `_ecm_scalar_mul` | `_ecm_scalar_mul` | `_ecm_scalar_mul` | + +!!! note + `widen(UInt128) == BigInt` in base Julia, so UInt128 still allocates through BigInt. + `widen(UInt256) == UInt512` via BitIntegers.jl, giving pure LLVM fixed-width arithmetic + with zero allocations — but GMP's hand-tuned assembly is faster per-operation. + +### End-to-End Factorisation + +`factorization_endtoend.jl` benchmarks `factor(n)` for semiprimes ranging from 12 to 45+ +digits, showing how the polyalgorithm (trial division → Pollard rho → ECM → MPQS) scales +with input size. From 48e23f92ac9af0353a43e2ed97b8223d8ae19b93 Mon Sep 17 00:00:00 2001 From: s-celles Date: Thu, 12 Mar 2026 15:34:24 +0100 Subject: [PATCH 6/8] perf: benchmark - fix speedup calc --- benchmarks/ecm_gmp_vs_generic.jl | 2 +- benchmarks/ecm_microbenchmarks.jl | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/benchmarks/ecm_gmp_vs_generic.jl b/benchmarks/ecm_gmp_vs_generic.jl index 0598878..bcb4de2 100644 --- a/benchmarks/ecm_gmp_vs_generic.jl +++ b/benchmarks/ecm_gmp_vs_generic.jl @@ -104,7 +104,7 @@ for case in CASES println(" GMP in-place : $(round(med_gmp; digits=2)) ms, $(round(alloc_gmp; digits=1)) KiB") println(" Generic alloc: $(round(med_gen; digits=2)) ms, $(round(alloc_gen; digits=1)) KiB") if med_gmp > 0 - println(" Speedup : $(round(med_gen / med_gmp; digits=2))×") + println(" Speedup : $(round(med_gmp / med_gen; digits=2))×") end if alloc_gen > 0 println(" Memory saved : $(round((1 - alloc_gmp / alloc_gen) * 100; digits=1))%") diff --git a/benchmarks/ecm_microbenchmarks.jl b/benchmarks/ecm_microbenchmarks.jl index 8720111..613a0f4 100644 --- a/benchmarks/ecm_microbenchmarks.jl +++ b/benchmarks/ecm_microbenchmarks.jl @@ -161,12 +161,12 @@ println(" (GMP in-place → Generic BigInt → UInt128 → UInt256)") println("=" ^ 70) function fmt_speedup(base, other) - s = other / base - "$(round(s; digits=1))×" + s = base / other + "$(round(s; digits=2))×" end -println(" _mulmod: $(round(median(b1).time; digits=0)) ns → $(round(median(b2).time; digits=0)) ns ($(fmt_speedup(median(b1).time, median(b2).time))) → $(round(median(b3).time; digits=0)) ns → $(round(median(b3b).time; digits=0)) ns ($(fmt_speedup(median(b1).time, median(b3b).time)) vs GMP)") -println(" _ecm_add: $(round(median(b4).time; digits=0)) ns → $(round(median(b5).time; digits=0)) ns ($(fmt_speedup(median(b4).time, median(b5).time))) → $(round(median(b6).time; digits=0)) ns → $(round(median(b6b).time; digits=0)) ns ($(fmt_speedup(median(b4).time, median(b6b).time)) vs GMP)") -println(" _ecm_double: $(round(median(b7).time; digits=0)) ns → $(round(median(b8).time; digits=0)) ns ($(fmt_speedup(median(b7).time, median(b8).time))) → $(round(median(b9).time; digits=0)) ns → $(round(median(b9b).time; digits=0)) ns ($(fmt_speedup(median(b7).time, median(b9b).time)) vs GMP)") -println(" _ecm_scalar_mul: $(round(median(b10).time/1e3; digits=1)) μs → $(round(median(b11).time/1e3; digits=1)) μs ($(fmt_speedup(median(b10).time, median(b11).time))) → $(round(median(b12).time/1e3; digits=1)) μs → $(round(median(b12b).time/1e3; digits=1)) μs ($(fmt_speedup(median(b10).time, median(b12b).time)) vs GMP)") +println(" _mulmod: $(round(median(b1).time; digits=0)) ns → $(round(median(b2).time; digits=0)) ns ($(fmt_speedup(median(b1).time, median(b2).time))) → $(round(median(b3).time; digits=0)) ns ($(fmt_speedup(median(b1).time, median(b3).time))) → $(round(median(b3b).time; digits=0)) ns ($(fmt_speedup(median(b1).time, median(b3b).time)))") +println(" _ecm_add: $(round(median(b4).time; digits=0)) ns → $(round(median(b5).time; digits=0)) ns ($(fmt_speedup(median(b4).time, median(b5).time))) → $(round(median(b6).time; digits=0)) ns ($(fmt_speedup(median(b4).time, median(b6).time))) → $(round(median(b6b).time; digits=0)) ns ($(fmt_speedup(median(b4).time, median(b6b).time)))") +println(" _ecm_double: $(round(median(b7).time; digits=0)) ns → $(round(median(b8).time; digits=0)) ns ($(fmt_speedup(median(b7).time, median(b8).time))) → $(round(median(b9).time; digits=0)) ns ($(fmt_speedup(median(b7).time, median(b9).time))) → $(round(median(b9b).time; digits=0)) ns ($(fmt_speedup(median(b7).time, median(b9b).time)))") +println(" _ecm_scalar_mul: $(round(median(b10).time/1e3; digits=1)) μs → $(round(median(b11).time/1e3; digits=1)) μs ($(fmt_speedup(median(b10).time, median(b11).time))) → $(round(median(b12).time/1e3; digits=1)) μs ($(fmt_speedup(median(b10).time, median(b12).time))) → $(round(median(b12b).time/1e3; digits=1)) μs ($(fmt_speedup(median(b10).time, median(b12b).time)))") println("=" ^ 70) From fbd2b2b557c4acbc1337dbcc6e898ea0da25107b Mon Sep 17 00:00:00 2001 From: Oscar Smith Date: Thu, 12 Mar 2026 14:00:51 -0400 Subject: [PATCH 7/8] Apply suggestions from code review Co-authored-by: Oscar Smith --- src/ecm.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ecm.jl b/src/ecm.jl index 8ca70dc..2973647 100644 --- a/src/ecm.jl +++ b/src/ecm.jl @@ -27,8 +27,7 @@ end # Uses widen(T) to avoid overflow for fixed-width integers. # For BigInt, widen returns BigInt itself, so this is simply mod(a * b, n). @inline function _mulmod(a::T, b::T, n::T) where {T<:Integer} - W = widen(T) - T(mod(W(a) * W(b), W(n))) + mod(a * b, n) end # ============================================================================= From 684164827b760b952dba582f04445675f3ffcbe1 Mon Sep 17 00:00:00 2001 From: s-celles Date: Thu, 12 Mar 2026 22:04:57 +0100 Subject: [PATCH 8/8] feat: stage 2 continuation --- src/ecm.jl | 239 ++++++++++++++++++++++++++++++++++++++++++++++- test/runtests.jl | 78 ++++++++++++++++ 2 files changed, 314 insertions(+), 3 deletions(-) diff --git a/src/ecm.jl b/src/ecm.jl index 2973647..0aed909 100644 --- a/src/ecm.jl +++ b/src/ecm.jl @@ -229,6 +229,224 @@ function _ecm_scalar_mul(k::Integer, PX::T, PZ::T, n::T, a24::T) where {T<:Integ R0X, R0Z end +# ============================================================================= +# Stage 2: standard continuation (baby-step giant-step). +# +# After Stage 1 computes Q = [∏ p^k ≤ B1] P, Stage 2 checks whether the +# group order has exactly one prime factor p in (B1, B2]. Uses a stride D +# and precomputed baby-step table [d]Q for d ∈ {1,3,5,...,D-1} with gcd(d,6)=1 +# to cover all primes in the interval with only differential additions. +# +# Ref: Montgomery (1987) §10, Brent (1986) "Some integer factorization +# algorithms using elliptic curves". +# ============================================================================= + +# Baby-step offsets: odd values d with 1 ≤ d < D and gcd(d, 6) == 1. +function _stage2_baby_offsets(D::Int) + [d for d in 1:2:(D-1) if gcd(d, 6) == 1] +end + +# Generic Stage 2 continuation. +# Returns a non-trivial factor of n, or nothing. +function _ecm_stage2(QX::T, QZ::T, n::T, a24::T, B1::Int, B2::Int) where {T<:Integer} + D = 2 * isqrt(B2 - B1 + 1) + D = max(D, 6) + # Round D up to next multiple of 6 for alignment with prime residues. + D = D + (6 - mod(D, 6)) % 6 + + offsets = _stage2_baby_offsets(D) + isempty(offsets) && return nothing + + # Precompute baby-step table: baby[d] = [d]Q for each offset d. + # Also need [2]Q for building the table via differential addition chain. + S2X, S2Z = _ecm_double(QX, QZ, n, a24) # [2]Q + + # Build multiples of Q: muls[k] = [k]Q for k = 1, 2, ..., max(D-1, D) + # We only need odd multiples with gcd(d,6)==1, but building a full table + # up to D is simpler and the table is small. + max_d = maximum(offsets) + # mulX[k], mulZ[k] = [k]Q + mulX = Vector{T}(undef, max_d) + mulZ = Vector{T}(undef, max_d) + mulX[1], mulZ[1] = QX, QZ + if max_d >= 2 + mulX[2], mulZ[2] = S2X, S2Z + end + for k in 3:max_d + # [k]Q = [k-1]Q + [1]Q, diff = [k-2]Q + mulX[k], mulZ[k] = _ecm_add(mulX[k-1], mulZ[k-1], QX, QZ, + mulX[k-2], mulZ[k-2], n) + end + + # Precompute [D]Q for giant steps. + # Build [D]Q using the ladder. + DX, DZ = _ecm_scalar_mul(D, QX, QZ, n, a24) + + # Giant step: start at the first multiple of D above B1. + # For each giant step center c = j*D, check primes c ± d for d ∈ offsets. + j_start = div(B1, D) + 1 + j_end = div(B2, D) + 1 + + # Compute R = [j_start * D]Q + RX, RZ = _ecm_scalar_mul(j_start * D, QX, QZ, n, a24) + + # We also need the previous giant step for differential addition: + # Rprev = [(j_start - 1) * D]Q + RprevX, RprevZ = _ecm_scalar_mul((j_start - 1) * D, QX, QZ, n, a24) + + acc = one(T) + batch_count = 0 + + for j in j_start:j_end + c = j * D + for d in offsets + p_plus = c + d + p_minus = c - d + # Only check values in (B1, B2]. + if B1 < p_plus <= B2 + # The factor reveals itself when [p]Q = O, i.e. Z = 0. + # With R = [c]Q, baby = [d]Q: + # [c+d]Q and [c-d]Q share the property that their Z-coords + # vanish iff c±d divides the group order. + # The key identity: for Montgomery curves, + # X([c]Q) * Z([d]Q) - Z([c]Q) * X([d]Q) = 0 + # iff [c-d]Q or [c+d]Q is the point at infinity. + # We accumulate: acc *= (R_X * baby_Z - R_Z * baby_X) + val = _submod(_mulmod(RX, mulZ[d], n), _mulmod(RZ, mulX[d], n), n) + acc = _mulmod(acc, val, n) + batch_count += 1 + end + if B1 < p_minus <= B2 && p_minus != p_plus + val = _submod(_mulmod(RX, mulZ[d], n), _mulmod(RZ, mulX[d], n), n) + # For c-d we use the same identity (same expression). + acc = _mulmod(acc, val, n) + batch_count += 1 + end + end + + if batch_count >= 100 + g = gcd(acc, n) + 1 < g < n && return g + g == n && return nothing # degenerate + acc = one(T) + batch_count = 0 + end + + # Advance giant step: R_next = R + [D]Q, diff = R_prev + if j < j_end + RnextX, RnextZ = _ecm_add(RX, RZ, DX, DZ, RprevX, RprevZ, n) + RprevX, RprevZ = RX, RZ + RX, RZ = RnextX, RnextZ + end + end + + # Final GCD check + if batch_count > 0 + g = gcd(acc, n) + 1 < g < n && return g + end + return nothing +end + +# BigInt GMP in-place Stage 2 continuation. +function _ecm_stage2!(Q_X::BigInt, Q_Z::BigInt, n::BigInt, a24::BigInt, + B1::Int, B2::Int, buf::ECMBuffers)::Union{BigInt, Nothing} + D = 2 * isqrt(B2 - B1 + 1) + D = max(D, 6) + D = D + (6 - mod(D, 6)) % 6 + + offsets = _stage2_baby_offsets(D) + isempty(offsets) && return nothing + + max_d = maximum(offsets) + + # Precompute baby-step table (vectors of BigInt). + mulX = [BigInt() for _ in 1:max_d] + mulZ = [BigInt() for _ in 1:max_d] + Base.GMP.MPZ.set!(mulX[1], Q_X) + Base.GMP.MPZ.set!(mulZ[1], Q_Z) + + # [2]Q + S2X, S2Z = BigInt(), BigInt() + _ecm_double!(S2X, S2Z, Q_X, Q_Z, n, a24, buf) + if max_d >= 2 + Base.GMP.MPZ.set!(mulX[2], S2X) + Base.GMP.MPZ.set!(mulZ[2], S2Z) + end + for k in 3:max_d + _ecm_add!(mulX[k], mulZ[k], mulX[k-1], mulZ[k-1], Q_X, Q_Z, + mulX[k-2], mulZ[k-2], n, buf) + end + + # [D]Q via scalar multiplication + DX, DZ = BigInt(), BigInt() + _ecm_scalar_mul!(DX, DZ, D, Q_X, Q_Z, n, a24, buf) + + # Giant step state + j_start = div(B1, D) + 1 + j_end = div(B2, D) + 1 + + RX, RZ = BigInt(), BigInt() + _ecm_scalar_mul!(RX, RZ, j_start * D, Q_X, Q_Z, n, a24, buf) + + RprevX, RprevZ = BigInt(), BigInt() + _ecm_scalar_mul!(RprevX, RprevZ, (j_start - 1) * D, Q_X, Q_Z, n, a24, buf) + + RnextX, RnextZ = BigInt(), BigInt() + acc = BigInt(1) + tmp_mul = BigInt() + tmp1 = BigInt() + tmp2 = BigInt() + batch_count = 0 + + for j in j_start:j_end + c = j * D + for d in offsets + p_plus = c + d + p_minus = c - d + if B1 < p_plus <= B2 + # val = R_X * baby_Z - R_Z * baby_X (mod n) + _mulmod!(tmp1, RX, mulZ[d], n, tmp_mul) + _mulmod!(tmp2, RZ, mulX[d], n, tmp_mul) + Base.GMP.MPZ.sub!(tmp1, tmp2) + Base.GMP.MPZ.mul!(tmp_mul, acc, tmp1) + _mpz_fdiv_r!(acc, tmp_mul, n) + batch_count += 1 + end + if B1 < p_minus <= B2 && p_minus != p_plus + _mulmod!(tmp1, RX, mulZ[d], n, tmp_mul) + _mulmod!(tmp2, RZ, mulX[d], n, tmp_mul) + Base.GMP.MPZ.sub!(tmp1, tmp2) + Base.GMP.MPZ.mul!(tmp_mul, acc, tmp1) + _mpz_fdiv_r!(acc, tmp_mul, n) + batch_count += 1 + end + end + + if batch_count >= 100 + g = gcd(acc, n) + 1 < g < n && return g + g == n && return nothing + Base.GMP.MPZ.set_si!(acc, 1) + batch_count = 0 + end + + if j < j_end + _ecm_add!(RnextX, RnextZ, RX, RZ, DX, DZ, RprevX, RprevZ, n, buf) + Base.GMP.MPZ.set!(RprevX, RX) + Base.GMP.MPZ.set!(RprevZ, RZ) + Base.GMP.MPZ.set!(RX, RnextX) + Base.GMP.MPZ.set!(RZ, RnextZ) + end + end + + if batch_count > 0 + g = gcd(acc, n) + 1 < g < n && return g + end + return nothing +end + # ============================================================================= # Public API # ============================================================================= @@ -236,10 +454,12 @@ end """ ecm_factor(n::T, B1::Int, num_curves::Int) where {T<:Integer} -> Union{T, Nothing} -Attempt to find a non-trivial factor of `n` using the Elliptic Curve Method (ECM). +Attempt to find a non-trivial factor of `n` using the Elliptic Curve Method (ECM) +with Stage 1 (bound `B1`) and Stage 2 continuation (bound `B2 = 100 * B1`). Uses Montgomery curves with Suyama's parametrization and a batched GCD to reduce -the number of `gcd` calls in the inner loop. +the number of `gcd` calls in the inner loop. Stage 2 uses a baby-step giant-step +approach to efficiently check primes in `(B1, B2]`. This generic method works for any integer type `T`, including fixed-width types from [BitIntegers.jl](https://github.com/rfourquet/BitIntegers.jl) (e.g. `UInt256`, `UInt512`). @@ -252,6 +472,7 @@ See also the `BigInt`-optimised overload which uses in-place GMP arithmetic. """ function ecm_factor(n::T, B1::Int, num_curves::Int) where {T<:Integer} prime_powers = _ecm_prime_powers(B1) + B2 = 100 * B1 for _ in 1:num_curves curve = _ecm_suyama(n) @@ -288,7 +509,12 @@ function ecm_factor(n::T, B1::Int, num_curves::Int) where {T<:Integer} if batch_count > 0 g = gcd(acc, n) 1 < g < n && return g + g == n && continue end + + # Stage 2: standard continuation for primes in (B1, B2]. + s2 = _ecm_stage2(QX, QZ, n, a24, B1, B2) + s2 !== nothing && return s2 end return nothing end @@ -297,12 +523,14 @@ end ecm_factor(n::BigInt, B1::Int, num_curves::Int) -> Union{BigInt, Nothing} BigInt-optimised ECM using in-place GMP arithmetic to minimise allocations in the -hot Montgomery ladder loop. For fixed-width integer types use the generic method. +hot Montgomery ladder loop. Includes Stage 2 continuation (`B2 = 100 * B1`) using +baby-step giant-step. For fixed-width integer types use the generic method. Returns a non-trivial factor of `n`, or `nothing` if none is found within the budget. """ function ecm_factor(n::BigInt, B1::Int, num_curves::Int)::Union{BigInt, Nothing} prime_powers = _ecm_prime_powers(B1) + B2 = 100 * B1 buf = ECMBuffers() Q_X = BigInt() @@ -346,7 +574,12 @@ function ecm_factor(n::BigInt, B1::Int, num_curves::Int)::Union{BigInt, Nothing} if batch_count > 0 g = gcd(acc, n) 1 < g < n && return g + g == n && continue end + + # Stage 2: standard continuation for primes in (B1, B2]. + s2 = _ecm_stage2!(Q_X, Q_Z, n, a24, B1, B2, buf) + s2 !== nothing && return s2 end return nothing end diff --git a/test/runtests.jl b/test/runtests.jl index a89fc0c..e00ebcf 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -567,6 +567,84 @@ end # May or may not find a factor with just 1 curve — no assertion on result end +@testset "ECM Stage 2" begin + # Stage 2 should improve success rate for low B1 values where Stage 1 + # alone frequently fails. With B1=200 and B2=100*B1, the continuation + # catches group orders with one large prime factor in (B1, B2]. + + n = big"824633720831" * big"1000000007" + + # With B1=200 and many curves, Stage 2 should find the factor reliably. + # (Stage 1 alone at B1=200 finds it only ~8% of the time per 5-curve attempt.) + result = Primes.ecm_factor(n, 200, 50) + @test result !== nothing + @test mod(n, result) == 0 + + # BigInt path: verify GMP in-place Stage 2 works + n_big = big"1000000007" * big"1000000009" + result_big = Primes.ecm_factor(n_big, 200, 50) + @test result_big !== nothing + @test mod(n_big, result_big) == 0 + + # Generic path: UInt128 + n_u128 = UInt128(1_000_000_007) * UInt128(1_000_000_009) + result_u128 = Primes.ecm_factor(n_u128, 200, 50) + @test result_u128 !== nothing + @test mod(n_u128, result_u128) == 0 + @test result_u128 isa UInt128 +end + +@testset "ECM Stage 2 finds factors Stage 1 misses" begin + # Run Stage 1 alone on many curves with low B1. Collect curves where + # Stage 1 failed (gcd == 1), then run Stage 2 on those and verify it + # finds at least one factor that Stage 1 could not. + n = big"824633720831" * big"1000000007" + B1 = 200 + B2 = 100 * B1 + pp = Primes._ecm_prime_powers(B1) + + stage1_only_found = 0 + stage2_extra_found = 0 + curves_tested = 0 + stage1_missed = 0 + + for _ in 1:200 + curve = Primes._ecm_suyama(n) + curve === nothing && continue + !(curve isa Tuple) && continue + x0, z0, a24 = curve + curves_tested += 1 + + # Run Stage 1 manually + QX, QZ = x0, z0 + for pk in pp + QX, QZ = Primes._ecm_scalar_mul(pk, QX, QZ, n, a24) + end + g1 = gcd(QZ, n) + if 1 < g1 < n + stage1_only_found += 1 + continue + end + + # Stage 1 did not find a factor on this curve + stage1_missed += 1 + + # Now run Stage 2 on the same post-Stage-1 point + s2 = Primes._ecm_stage2(QX, QZ, n, a24, B1, B2) + if s2 !== nothing && 1 < s2 < n + stage2_extra_found += 1 + @test mod(n, s2) == 0 + end + end + + # With B1=200 on this number, Stage 1 finds ~8% of curves. + # Stage 2 should rescue additional curves, proving it adds value. + @test curves_tested >= 100 # enough curves for a meaningful test + @test stage1_missed > 0 # Stage 1 didn't find everything + @test stage2_extra_found > 0 # Stage 2 found at least one that Stage 1 missed + @test stage2_extra_found > stage1_only_found # Stage 2 rescues more than Stage 1 found +end + @testset "ECM generic integer types" begin # UInt128: widen(UInt128) == BigInt in base Julia — exercises the generic # _mulmod/_addmod/_submod helpers via the BigInt widen path.