Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions benchmarks/bench_eth_eip4844_kzg.nim
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ proc benchBlobToKzgCommitment(b: BenchSet, ctx: ptr EthereumKZGContext, iters: i

## We require `tp` to be unintialized as even idle threads somehow reduce perf of serial benches
let tp = Threadpool.new()
let numThreads = tp.numThreads

let startParallel = getMonotime()
block:
Expand All @@ -88,7 +89,7 @@ proc benchBlobToKzgCommitment(b: BenchSet, ctx: ptr EthereumKZGContext, iters: i
let perfParallel = inNanoseconds((stopParallel-startParallel) div iters)

let parallelSpeedup = float(perfSerial) / float(perfParallel)
echo &"Speedup ratio parallel {tp.numThreads} threads over serial: {parallelSpeedup:>6.3f}x"
echo &"Speedup ratio parallel {numThreads} threads over serial: {parallelSpeedup:>6.3f}x"

proc benchComputeKzgProof(b: BenchSet, ctx: ptr EthereumKZGContext, iters: int) =

Expand All @@ -102,6 +103,7 @@ proc benchComputeKzgProof(b: BenchSet, ctx: ptr EthereumKZGContext, iters: int)

## We require `tp` to be unintialized as even idle threads somehow reduce perf of serial benches
let tp = Threadpool.new()
let numThreads = tp.numThreads

let startParallel = getMonotime()
block:
Expand All @@ -117,7 +119,7 @@ proc benchComputeKzgProof(b: BenchSet, ctx: ptr EthereumKZGContext, iters: int)
let perfParallel = inNanoseconds((stopParallel-startParallel) div iters)

let parallelSpeedup = float(perfSerial) / float(perfParallel)
echo &"Speedup ratio parallel {tp.numThreads} threads over serial: {parallelSpeedup:>6.3f}x"
echo &"Speedup ratio parallel {numThreads} threads over serial: {parallelSpeedup:>6.3f}x"

proc benchComputeBlobKzgProof(b: BenchSet, ctx: ptr EthereumKZGContext, iters: int) =

Expand All @@ -130,6 +132,7 @@ proc benchComputeBlobKzgProof(b: BenchSet, ctx: ptr EthereumKZGContext, iters: i

## We require `tp` to be unintialized as even idle threads somehow reduce perf of serial benches
let tp = Threadpool.new()
let numThreads = tp.numThreads

let startParallel = getMonotime()
block:
Expand All @@ -144,7 +147,7 @@ proc benchComputeBlobKzgProof(b: BenchSet, ctx: ptr EthereumKZGContext, iters: i
let perfParallel = inNanoseconds((stopParallel-startParallel) div iters)

let parallelSpeedup = float(perfSerial) / float(perfParallel)
echo &"Speedup ratio parallel {tp.numThreads} threads over serial: {parallelSpeedup:>6.3f}x"
echo &"Speedup ratio parallel {numThreads} threads over serial: {parallelSpeedup:>6.3f}x"

proc benchVerifyKzgProof(b: BenchSet, ctx: ptr EthereumKZGContext, iters: int) =

Expand All @@ -163,6 +166,7 @@ proc benchVerifyBlobKzgProof(b: BenchSet, ctx: ptr EthereumKZGContext, iters: in

## We require `tp` to be unintialized as even idle threads somehow reduce perf of serial benches
let tp = Threadpool.new()
let numThreads = tp.numThreads

let startParallel = getMonotime()
block:
Expand All @@ -176,7 +180,7 @@ proc benchVerifyBlobKzgProof(b: BenchSet, ctx: ptr EthereumKZGContext, iters: in
let perfParallel = inNanoseconds((stopParallel-startParallel) div iters)

let parallelSpeedup = float(perfSerial) / float(perfParallel)
echo &"Speedup ratio parallel {tp.numThreads} threads over serial: {parallelSpeedup:>6.3f}x"
echo &"Speedup ratio parallel {numThreads} threads over serial: {parallelSpeedup:>6.3f}x"

proc benchVerifyBlobKzgProofBatch(b: BenchSet, ctx: ptr EthereumKZGContext, iters: int) =

Expand All @@ -201,6 +205,7 @@ proc benchVerifyBlobKzgProofBatch(b: BenchSet, ctx: ptr EthereumKZGContext, iter

## We require `tp` to be unintialized as even idle threads somehow reduce perf of serial benches
let tp = Threadpool.new()
let numTHreads = tp.numThreads

let startParallel = getMonotime()
block:
Expand All @@ -220,7 +225,7 @@ proc benchVerifyBlobKzgProofBatch(b: BenchSet, ctx: ptr EthereumKZGContext, iter
let perfParallel = inNanoseconds((stopParallel-startParallel) div iters)

let parallelSpeedup = float(perfSerial) / float(perfParallel)
echo &"Speedup ratio parallel {tp.numThreads} threads over serial: {parallelSpeedup:>6.3f}x"
echo &"Speedup ratio parallel {numThreads} threads over serial: {parallelSpeedup:>6.3f}x"
echo ""

i *= 2
Expand Down Expand Up @@ -258,7 +263,7 @@ proc main() =
echo ""
benchVerifyBlobKzgProofBatch(b, ctx, Iters)
separator()

ctx.trusted_setup_delete()

when isMainModule:
main()
32 changes: 16 additions & 16 deletions constantine/math/elliptic/ec_multi_scalar_mul.nim
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ func multiScalarMulImpl_reference_vartime[bits: static int, EC, ECaff](
const numBuckets = 1 shl c - 1 # bucket 0 is unused
const numWindows = bits.ceilDiv_vartime(c)

let miniMSMs = allocHeapArray(EC, numWindows)
let buckets = allocHeapArray(EC, numBuckets)
let miniMSMs = allocHeapArrayAligned(EC, numWindows, alignment = 64)
let buckets = allocHeapArrayAligned(EC, numBuckets, alignment = 64)

# Algorithm
# ---------
Expand Down Expand Up @@ -91,8 +91,8 @@ func multiScalarMulImpl_reference_vartime[bits: static int, EC, ECaff](

# Cleanup
# -------
buckets.freeHeap()
miniMSMs.freeHeap()
buckets.freeHeapAligned()
miniMSMs.freeHeapAligned()

func multiScalarMul_reference_dispatch_vartime[bits: static int, EC, ECaff](
r: var EC,
Expand Down Expand Up @@ -151,7 +151,7 @@ func multiScalarMul_reference_vartime*[F, EC, ECaff](
coefs_big.batchFromField(coefs, n)
r.multiScalarMul_reference_vartime(coefs_big, points, n)

freeHeapAligned(coefs_big)
coefs_big.freeHeapAligned()

func multiScalarMul_reference_vartime*[EC, ECaff](
r: var EC,
Expand Down Expand Up @@ -264,7 +264,7 @@ func msmImpl_vartime[bits: static int, EC, ECaff](
# -----
const numBuckets = 1 shl (c-1)

let buckets = allocHeapArray(EC, numBuckets)
let buckets = allocHeapArrayAligned(EC, numBuckets, alignment = 64)
for i in 0 ..< numBuckets:
buckets[i].setNeutral()

Expand Down Expand Up @@ -293,7 +293,7 @@ func msmImpl_vartime[bits: static int, EC, ECaff](

# Cleanup
# -------
buckets.freeHeap()
buckets.freeHeapAligned()

# Multi scalar multiplication with batched affine additions
# -----------------------------------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -357,8 +357,8 @@ func msmAffineImpl_vartime[bits: static int, EC, ECaff](
# Setup
# -----
const (numBuckets, queueLen) = c.deriveSchedulerConstants()
let buckets = allocHeap(Buckets[numBuckets, EC, ECaff])
let sched = allocHeap(Scheduler[numBuckets, queueLen, EC, ECaff])
let buckets = allocHeapAligned(Buckets[numBuckets, EC, ECaff], alignment = 64)
let sched = allocHeapAligned(Scheduler[numBuckets, queueLen, EC, ECaff], alignment = 64)
sched.init(points, buckets, 0, numBuckets.int32)

# Algorithm
Expand Down Expand Up @@ -389,8 +389,8 @@ func msmAffineImpl_vartime[bits: static int, EC, ECaff](

# Cleanup
# -------
sched.freeHeap()
buckets.freeHeap()
sched.freeHeapAligned()
buckets.freeHeapAligned()

# Endomorphism acceleration
# -----------------------------------------------------------------------------------------------------------------------
Expand All @@ -410,8 +410,8 @@ proc applyEndomorphism[bits: static int, ECaff](
else: ECaff.G

const L = ECaff.getScalarField().bits().computeEndoRecodedLength(M)
let splitCoefs = allocHeapArray(array[M, BigInt[L]], N)
let endoBasis = allocHeapArray(array[M, ECaff], N)
let splitCoefs = allocHeapArrayAligned(array[M, BigInt[L]], N, alignment = 64)
let endoBasis = allocHeapArrayAligned(array[M, ECaff], N, alignment = 64)

for i in 0 ..< N:
var negatePoints {.noinit.}: array[M, SecretBool]
Expand Down Expand Up @@ -448,8 +448,8 @@ template withEndo[coefsBits: static int, EC, ECaff](
# Given that bits and N changed, we are able to use a bigger `c`
# but it has no significant impact on performance
msmProc(r, endoCoefs, endoPoints, endoN, c)
freeHeap(endoCoefs)
freeHeap(endoPoints)
endoCoefs.freeHeapAligned()
endoPoints.freeHeapAligned()
else:
msmProc(r, coefs, points, N, c)

Expand Down Expand Up @@ -555,7 +555,7 @@ func multiScalarMul_vartime*[F, EC, ECaff](
coefs_big.batchFromField(coefs, n)
r.multiScalarMul_vartime(coefs_big, points, n)

freeHeapAligned(coefs_big)
coefs_big.freeHeapAligned()

func multiScalarMul_vartime*[EC, ECaff](
r: var EC,
Expand Down
Loading