From 777c4e20eae22de99cdb3e2217d667852c65ab5c Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Mon, 13 Apr 2026 19:26:14 +0200 Subject: [PATCH 01/11] Add unsafe version of `foreach_neighbor` and `foreach_point_neighbor` --- src/PointNeighbors.jl | 3 +- src/neighborhood_search.jl | 121 ++++++++++++++++++++++++++++++++++--- src/nhs_grid.jl | 15 +++-- src/nhs_precomputed.jl | 20 +++--- 4 files changed, 134 insertions(+), 25 deletions(-) diff --git a/src/PointNeighbors.jl b/src/PointNeighbors.jl index 702c19e4..58b38687 100644 --- a/src/PointNeighbors.jl +++ b/src/PointNeighbors.jl @@ -20,7 +20,8 @@ include("nhs_grid.jl") include("nhs_precomputed.jl") include("gpu.jl") -export foreach_point_neighbor, foreach_neighbor +export foreach_point_neighbor, foreach_point_neighbor_unsafe, + foreach_neighbor, foreach_neighbor_unsafe export TrivialNeighborhoodSearch, GridNeighborhoodSearch, PrecomputedNeighborhoodSearch export DictionaryCellList, FullGridCellList, SpatialHashingCellList export DynamicVectorOfVectors diff --git a/src/neighborhood_search.jl b/src/neighborhood_search.jl index 5e99b9cd..a7f23916 100644 --- a/src/neighborhood_search.jl +++ b/src/neighborhood_search.jl @@ -178,7 +178,7 @@ Note that `system_coords` and `neighbor_coords` can be identical. automatically based on the type of `system_coords`. See [`@threaded`](@ref) for a list of available backends. -See also [`initialize!`](@ref), [`update!`](@ref). +See also [`foreach_point_neighbor_unsafe`](@ref), [`initialize!`](@ref), [`update!`](@ref). """ function foreach_point_neighbor(f::T, system_coords, neighbor_coords, neighborhood_search; parallelization_backend::ParallelizationBackend = default_backend(system_coords), @@ -200,6 +200,53 @@ function foreach_point_neighbor(f::T, system_coords, neighbor_coords, neighborho return nothing end +""" + foreach_point_neighbor_unsafe(f, system_coords, neighbor_coords, neighborhood_search; + parallelization_backend = default_backend(system_coords), + points = axes(system_coords, 2)) + +Like [`foreach_point_neighbor`](@ref), but skips **all** bounds checks inside the +threaded loop or GPU kernel. +See [`foreach_neighbor_unsafe`](@ref) for more details on which bounds checks are skipped. + +!!! warning + The `neighborhood_search` must have been initialized or updated with `system_coords` + as first coordinate array and `neighbor_coords` as second coordinate array. + This can be skipped for certain implementations. See [`requires_update`](@ref). + +!!! warning + Use this only when `neighborhood_search` is known to be initialized correctly for + `system_coords` and `neighbor_system_coords`. +""" +function foreach_point_neighbor_unsafe(f::T, system_coords, neighbor_coords, neighborhood_search; + parallelization_backend::ParallelizationBackend = default_backend(system_coords), + points = axes(system_coords, 2)) where {T} + # Explicit bounds check before the hot loop (or GPU kernel) + @boundscheck checkbounds(system_coords, ndims(neighborhood_search), points) + + @threaded parallelization_backend for point in points + # Now we can safely assume that `point` is inbounds + @inbounds foreach_neighbor(f, system_coords, neighbor_coords, + neighborhood_search, point) + end + + return nothing +end + +""" + foreach_neighbor(f, system_coords, neighbor_system_coords, + neighborhood_search::AbstractNeighborhoodSearch, point; + search_radius = search_radius(neighborhood_search)) + +Loop over all neighbors of `point` and execute `f(i, j, pos_diff, d)` for every +neighbor within `search_radius`, where `i` is `point`, `j` is the neighbor index, +`pos_diff` is the vector from neighbor to point, and `d` is the distance. + +This method performs bounds checks, even when called with `@inbounds`. +`@inbounds` only skips the bounds check for loading the coordinates of `point` +from `system_coords`. +See [`foreach_neighbor_unsafe`](@ref) for a version that skips all bounds checks. +""" @propagate_inbounds function foreach_neighbor(f, system_coords, neighbor_system_coords, neighborhood_search::AbstractNeighborhoodSearch, point; @@ -207,7 +254,7 @@ end # Due to https://github.com/JuliaLang/julia/issues/30411, we cannot just remove # a `@boundscheck` by calling this function with `@inbounds` because it has a kwarg. # We have to use `@propagate_inbounds`, which will also remove boundschecks - # in the neighbor loop, which is not safe (see comment below). + # in the neighbor loop, which is not safe (that's what `foreach_neighbor_unsafe` is for). # To avoid this, we have to use a function barrier to disable the `@inbounds` again. point_coords = extract_svector(system_coords, Val(ndims(neighborhood_search)), point) @@ -215,26 +262,80 @@ end point, point_coords, search_radius) end -# This is the generic function that is called for `TrivialNeighborhoodSearch`. -# For `GridNeighborhoodSearch`, a specialized function is used for slightly better -# performance. `PrecomputedNeighborhoodSearch` can skip the distance check altogether. +# This is a function barrier to prevent the `@inbounds` in `foreach_neighbor` +# from propagating into the neighbor loop, which is not safe. @inline function foreach_neighbor(f, neighbor_system_coords, neighborhood_search::AbstractNeighborhoodSearch, point, point_coords, search_radius) + foreach_neighbor_inner(f, neighbor_system_coords, neighborhood_search, + point, point_coords, search_radius) +end + +""" + foreach_neighbor_unsafe(f, system_coords, neighbor_system_coords, + neighborhood_search::AbstractNeighborhoodSearch, point; + search_radius = search_radius(neighborhood_search)) + +Like [`foreach_neighbor`](@ref), but skips **all** bounds checks. + +`foreach_neighbor` performs the following bounds checks that are skipped here: +- Check if `point` is in bounds of `system_coords`. This is the only bounds check + that is skipped when calling `foreach_neighbor` with `@inbounds`, and the only one that + is safe to skip when `point` is guaranteed to be in bounds of `system_coords`. +- Check if the neighbors of `point` are in bounds of `neighbor_system_coords`. + This is not safe to skip when the neighborhood search was not initialized correctly; + for example when initialized with coordinate arrays of different sizes than the ones + passed to this function or when the internal data structures have been tampered with. + In this case, the cell lists (for [`GridNeighborhoodSearch`](@ref)) or neighbor lists + (for [`PrecomputedNeighborhoodSearch`](@ref)) might contain indices that are out of + bounds for `neighbor_system_coords`. +- With `GridNeighborhoodSearch` and [`FullGridCellList`](@ref), check if the neighboring + cells are in bounds of the grid. Again, this is not safe to skip when the neighborhood + search might not have been initialized correctly. +- With `PrecomputedNeighborhoodSearch`, check if `point` is in bounds of the neighbor lists. + Again, this is not safe to skip when the neighborhood search was not initialized correctly. + +Note that all these bounds checks are safe to skip if +- `point` is guaranteed to be in bounds of `system_coords`, +- the neighborhood search was initialized correctly with `system_coords` + and `neighbor_system_coords` and has not been tampered with since then. + +!!! warning + Use this only when `point` is known to be in bounds of `system_coords` + and when `neighborhood_search` is known to be initialized correctly for + `system_coords` and `neighbor_system_coords`. +""" +@inline function foreach_neighbor_unsafe(f, system_coords, neighbor_system_coords, + neighborhood_search::AbstractNeighborhoodSearch, + point; + search_radius = search_radius(neighborhood_search)) + point_coords = @inbounds extract_svector(system_coords, Val(ndims(neighborhood_search)), + point) + + @inbounds foreach_neighbor_inner(f, neighbor_system_coords, neighborhood_search, + point, point_coords, search_radius) +end + +# This is the generic function that is called for `TrivialNeighborhoodSearch`. +# For `GridNeighborhoodSearch`, a specialized function is used for slightly better +# performance. `PrecomputedNeighborhoodSearch` can skip the distance check altogether. +# Note that calling this function with `@inbounds` is not safe. +# See the comments in `foreach_neighbor_unsafe`. +@propagate_inbounds function foreach_neighbor_inner(f, neighbor_system_coords, + neighborhood_search::AbstractNeighborhoodSearch, + point, point_coords, search_radius) (; periodic_box) = neighborhood_search for neighbor in eachneighbor(point_coords, neighborhood_search) - # Making the following `@inbounds` yields a ~2% speedup on an NVIDIA H100. - # But we don't know if `neighbor` (extracted from the cell list) is in bounds. neighbor_coords = extract_svector(neighbor_system_coords, Val(ndims(neighborhood_search)), neighbor) pos_diff = convert.(eltype(neighborhood_search), point_coords - neighbor_coords) distance2 = dot(pos_diff, pos_diff) - pos_diff, - distance2 = compute_periodic_distance(pos_diff, distance2, search_radius, - periodic_box) + (pos_diff, + distance2) = compute_periodic_distance(pos_diff, distance2, search_radius, + periodic_box) if distance2 <= search_radius^2 distance = sqrt(distance2) diff --git a/src/nhs_grid.jl b/src/nhs_grid.jl index 21759ab2..32953c8e 100644 --- a/src/nhs_grid.jl +++ b/src/nhs_grid.jl @@ -508,14 +508,19 @@ end # Specialized version of the function in `neighborhood_search.jl`, which is faster # than looping over `eachneighbor`. -@inline function foreach_neighbor(f, neighbor_system_coords, - neighborhood_search::GridNeighborhoodSearch, - point, point_coords, search_radius) +# Note that calling this function with `@inbounds` is not safe. +# See the comments in `foreach_neighbor_unsafe`. +@propagate_inbounds function foreach_neighbor_inner(f, neighbor_system_coords, + neighborhood_search::GridNeighborhoodSearch, + point, point_coords, search_radius) (; cell_list, periodic_box) = neighborhood_search cell = cell_coords(point_coords, neighborhood_search) for neighbor_cell_ in neighboring_cells(cell, neighborhood_search) neighbor_cell = Tuple(neighbor_cell_) + + # Making the following `@inbounds` is not safe because we don't know if + # `neighbor_cell` is in bounds of the grid. neighbors = points_in_cell(neighbor_cell, neighborhood_search) # Boolean to indicate if this cell has a collision (only with `SpatialHashingCellList`) @@ -525,8 +530,8 @@ end for neighbor_ in eachindex(neighbors) neighbor = @inbounds neighbors[neighbor_] - # Making the following `@inbounds` yields a ~2% speedup on an NVIDIA H100. - # But we don't know if `neighbor` (extracted from the cell list) is in bounds. + # Making the following `@inbounds` is not safe because we don't know + # if `neighbor` (extracted from the cell list) is in bounds. neighbor_coords = extract_svector(neighbor_system_coords, Val(ndims(neighborhood_search)), neighbor) diff --git a/src/nhs_precomputed.jl b/src/nhs_precomputed.jl index 187ff0c5..3495b198 100644 --- a/src/nhs_precomputed.jl +++ b/src/nhs_precomputed.jl @@ -200,22 +200,24 @@ function initialize_neighbor_lists!(neighbor_lists::DynamicVectorOfVectors, end end -@inline function foreach_neighbor(f, neighbor_system_coords, - neighborhood_search::PrecomputedNeighborhoodSearch, - point, point_coords, search_radius) +# Note that calling this function with `@inbounds` is not safe. +# See the comments in `foreach_neighbor_unsafe`. +@propagate_inbounds function foreach_neighbor_unsafe(f, neighbor_system_coords, + neighborhood_search::PrecomputedNeighborhoodSearch, + point, point_coords, search_radius) (; periodic_box, neighbor_lists) = neighborhood_search - neighbors = @inbounds neighbor_lists[point] + # Making the following `@inbounds` is not safe because the neighbor list + # might not contain `point` if the NHS was not initialized correctly. + neighbors = neighbor_lists[point] for neighbor_ in eachindex(neighbors) neighbor = @inbounds neighbors[neighbor_] - # Making this `@inbounds` is not perfectly safe because + # Making this `@inbounds` is not safe because # `neighbor` (extracted from the neighbor list) is only guaranteed to be in bounds # if the neighbor lists were constructed correctly and have not been corrupted. - # However, adding this `@inbounds` yields a ~20% speedup for TLSPH on GPUs (A4500). - neighbor_coords = @inbounds extract_svector(neighbor_system_coords, - Val(ndims(neighborhood_search)), - neighbor) + neighbor_coords = extract_svector(neighbor_system_coords, + Val(ndims(neighborhood_search)), neighbor) pos_diff = convert.(eltype(neighborhood_search), point_coords - neighbor_coords) distance2 = dot(pos_diff, pos_diff) From 753958f7e9fed304fb4aef30919631da11b92e6d Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Tue, 14 Apr 2026 10:58:10 +0200 Subject: [PATCH 02/11] Fix --- src/neighborhood_search.jl | 43 +++++++++++++++++++------------------- src/nhs_grid.jl | 18 +++++++++------- src/nhs_precomputed.jl | 19 +++++++++-------- 3 files changed, 42 insertions(+), 38 deletions(-) diff --git a/src/neighborhood_search.jl b/src/neighborhood_search.jl index a7f23916..cf02774e 100644 --- a/src/neighborhood_search.jl +++ b/src/neighborhood_search.jl @@ -216,25 +216,25 @@ See [`foreach_neighbor_unsafe`](@ref) for more details on which bounds checks ar !!! warning Use this only when `neighborhood_search` is known to be initialized correctly for - `system_coords` and `neighbor_system_coords`. + `system_coords` and `neighbor_coords`. """ -function foreach_point_neighbor_unsafe(f::T, system_coords, neighbor_coords, neighborhood_search; +function foreach_point_neighbor_unsafe(f::T, system_coords, neighbor_coords, + neighborhood_search; parallelization_backend::ParallelizationBackend = default_backend(system_coords), points = axes(system_coords, 2)) where {T} # Explicit bounds check before the hot loop (or GPU kernel) @boundscheck checkbounds(system_coords, ndims(neighborhood_search), points) @threaded parallelization_backend for point in points - # Now we can safely assume that `point` is inbounds - @inbounds foreach_neighbor(f, system_coords, neighbor_coords, - neighborhood_search, point) + foreach_neighbor_unsafe(f, system_coords, neighbor_coords, + neighborhood_search, point) end return nothing end """ - foreach_neighbor(f, system_coords, neighbor_system_coords, + foreach_neighbor(f, system_coords, neighbor_coords, neighborhood_search::AbstractNeighborhoodSearch, point; search_radius = search_radius(neighborhood_search)) @@ -247,7 +247,7 @@ This method performs bounds checks, even when called with `@inbounds`. from `system_coords`. See [`foreach_neighbor_unsafe`](@ref) for a version that skips all bounds checks. """ -@propagate_inbounds function foreach_neighbor(f, system_coords, neighbor_system_coords, +@propagate_inbounds function foreach_neighbor(f, system_coords, neighbor_coords, neighborhood_search::AbstractNeighborhoodSearch, point; search_radius = search_radius(neighborhood_search)) @@ -258,21 +258,21 @@ See [`foreach_neighbor_unsafe`](@ref) for a version that skips all bounds checks # To avoid this, we have to use a function barrier to disable the `@inbounds` again. point_coords = extract_svector(system_coords, Val(ndims(neighborhood_search)), point) - foreach_neighbor(f, neighbor_system_coords, neighborhood_search, + foreach_neighbor(f, neighbor_coords, neighborhood_search, point, point_coords, search_radius) end # This is a function barrier to prevent the `@inbounds` in `foreach_neighbor` # from propagating into the neighbor loop, which is not safe. -@inline function foreach_neighbor(f, neighbor_system_coords, +@inline function foreach_neighbor(f, neighbor_coords, neighborhood_search::AbstractNeighborhoodSearch, point, point_coords, search_radius) - foreach_neighbor_inner(f, neighbor_system_coords, neighborhood_search, + foreach_neighbor_inner(f, neighbor_coords, neighborhood_search, point, point_coords, search_radius) end """ - foreach_neighbor_unsafe(f, system_coords, neighbor_system_coords, + foreach_neighbor_unsafe(f, system_coords, neighbor_coords, neighborhood_search::AbstractNeighborhoodSearch, point; search_radius = search_radius(neighborhood_search)) @@ -282,13 +282,13 @@ Like [`foreach_neighbor`](@ref), but skips **all** bounds checks. - Check if `point` is in bounds of `system_coords`. This is the only bounds check that is skipped when calling `foreach_neighbor` with `@inbounds`, and the only one that is safe to skip when `point` is guaranteed to be in bounds of `system_coords`. -- Check if the neighbors of `point` are in bounds of `neighbor_system_coords`. +- Check if the neighbors of `point` are in bounds of `neighbor_coords`. This is not safe to skip when the neighborhood search was not initialized correctly; for example when initialized with coordinate arrays of different sizes than the ones passed to this function or when the internal data structures have been tampered with. In this case, the cell lists (for [`GridNeighborhoodSearch`](@ref)) or neighbor lists (for [`PrecomputedNeighborhoodSearch`](@ref)) might contain indices that are out of - bounds for `neighbor_system_coords`. + bounds for `neighbor_coords`. - With `GridNeighborhoodSearch` and [`FullGridCellList`](@ref), check if the neighboring cells are in bounds of the grid. Again, this is not safe to skip when the neighborhood search might not have been initialized correctly. @@ -298,21 +298,21 @@ Like [`foreach_neighbor`](@ref), but skips **all** bounds checks. Note that all these bounds checks are safe to skip if - `point` is guaranteed to be in bounds of `system_coords`, - the neighborhood search was initialized correctly with `system_coords` - and `neighbor_system_coords` and has not been tampered with since then. + and `neighbor_coords` and has not been tampered with since then. !!! warning Use this only when `point` is known to be in bounds of `system_coords` and when `neighborhood_search` is known to be initialized correctly for - `system_coords` and `neighbor_system_coords`. + `system_coords` and `neighbor_coords`. """ -@inline function foreach_neighbor_unsafe(f, system_coords, neighbor_system_coords, +@inline function foreach_neighbor_unsafe(f, system_coords, neighbor_coords, neighborhood_search::AbstractNeighborhoodSearch, point; search_radius = search_radius(neighborhood_search)) point_coords = @inbounds extract_svector(system_coords, Val(ndims(neighborhood_search)), point) - @inbounds foreach_neighbor_inner(f, neighbor_system_coords, neighborhood_search, + @inbounds foreach_neighbor_inner(f, neighbor_coords, neighborhood_search, point, point_coords, search_radius) end @@ -321,16 +321,17 @@ end # performance. `PrecomputedNeighborhoodSearch` can skip the distance check altogether. # Note that calling this function with `@inbounds` is not safe. # See the comments in `foreach_neighbor_unsafe`. -@propagate_inbounds function foreach_neighbor_inner(f, neighbor_system_coords, +@propagate_inbounds function foreach_neighbor_inner(f, neighbor_coords, neighborhood_search::AbstractNeighborhoodSearch, point, point_coords, search_radius) (; periodic_box) = neighborhood_search for neighbor in eachneighbor(point_coords, neighborhood_search) - neighbor_coords = extract_svector(neighbor_system_coords, - Val(ndims(neighborhood_search)), neighbor) + neighbor_point_coords = extract_svector(neighbor_coords, + Val(ndims(neighborhood_search)), neighbor) - pos_diff = convert.(eltype(neighborhood_search), point_coords - neighbor_coords) + pos_diff = convert.(eltype(neighborhood_search), + point_coords - neighbor_point_coords) distance2 = dot(pos_diff, pos_diff) (pos_diff, diff --git a/src/nhs_grid.jl b/src/nhs_grid.jl index 32953c8e..8e3c1ff2 100644 --- a/src/nhs_grid.jl +++ b/src/nhs_grid.jl @@ -510,7 +510,7 @@ end # than looping over `eachneighbor`. # Note that calling this function with `@inbounds` is not safe. # See the comments in `foreach_neighbor_unsafe`. -@propagate_inbounds function foreach_neighbor_inner(f, neighbor_system_coords, +@propagate_inbounds function foreach_neighbor_inner(f, neighbor_coords, neighborhood_search::GridNeighborhoodSearch, point, point_coords, search_radius) (; cell_list, periodic_box) = neighborhood_search @@ -532,15 +532,17 @@ end # Making the following `@inbounds` is not safe because we don't know # if `neighbor` (extracted from the cell list) is in bounds. - neighbor_coords = extract_svector(neighbor_system_coords, - Val(ndims(neighborhood_search)), neighbor) + neighbor_point_coords = extract_svector(neighbor_coords, + Val(ndims(neighborhood_search)), + neighbor) - pos_diff = convert.(eltype(neighborhood_search), point_coords - neighbor_coords) + pos_diff = convert.(eltype(neighborhood_search), + point_coords - neighbor_point_coords) distance2 = dot(pos_diff, pos_diff) - pos_diff, - distance2 = compute_periodic_distance(pos_diff, distance2, - search_radius, periodic_box) + (pos_diff, + distance2) = compute_periodic_distance(pos_diff, distance2, + search_radius, periodic_box) if distance2 <= search_radius^2 distance = sqrt(distance2) @@ -548,7 +550,7 @@ end # If this cell has a collision, check if this point belongs to this cell # (only with `SpatialHashingCellList`). if cell_collision && - check_collision(neighbor_cell_, neighbor_coords, cell_list, + check_collision(neighbor_cell_, neighbor_point_coords, cell_list, neighborhood_search) continue end diff --git a/src/nhs_precomputed.jl b/src/nhs_precomputed.jl index 3495b198..c7ddceca 100644 --- a/src/nhs_precomputed.jl +++ b/src/nhs_precomputed.jl @@ -202,9 +202,9 @@ end # Note that calling this function with `@inbounds` is not safe. # See the comments in `foreach_neighbor_unsafe`. -@propagate_inbounds function foreach_neighbor_unsafe(f, neighbor_system_coords, - neighborhood_search::PrecomputedNeighborhoodSearch, - point, point_coords, search_radius) +@propagate_inbounds function foreach_neighbor_inner(f, neighbor_coords, + neighborhood_search::PrecomputedNeighborhoodSearch, + point, point_coords, search_radius) (; periodic_box, neighbor_lists) = neighborhood_search # Making the following `@inbounds` is not safe because the neighbor list @@ -216,15 +216,16 @@ end # Making this `@inbounds` is not safe because # `neighbor` (extracted from the neighbor list) is only guaranteed to be in bounds # if the neighbor lists were constructed correctly and have not been corrupted. - neighbor_coords = extract_svector(neighbor_system_coords, - Val(ndims(neighborhood_search)), neighbor) + neighbor_point_coords = extract_svector(neighbor_coords, + Val(ndims(neighborhood_search)), neighbor) - pos_diff = convert.(eltype(neighborhood_search), point_coords - neighbor_coords) + pos_diff = convert.(eltype(neighborhood_search), + point_coords - neighbor_point_coords) distance2 = dot(pos_diff, pos_diff) - pos_diff, - distance2 = compute_periodic_distance(pos_diff, distance2, search_radius, - periodic_box) + (pos_diff, + distance2) = compute_periodic_distance(pos_diff, distance2, search_radius, + periodic_box) distance = sqrt(distance2) From be87e63034236da9d961700be02775cf708e88ec Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Tue, 14 Apr 2026 10:58:32 +0200 Subject: [PATCH 03/11] Add tests for unsafe functions --- test/neighborhood_search.jl | 61 +++++++++++++++++++++++++++++++------ 1 file changed, 52 insertions(+), 9 deletions(-) diff --git a/test/neighborhood_search.jl b/test/neighborhood_search.jl index 65d3ed85..3820e11d 100644 --- a/test/neighborhood_search.jl +++ b/test/neighborhood_search.jl @@ -254,7 +254,7 @@ names_copied = [name * " copied" for name in names] append!(names, names_copied) - @testset "$(names[i])" for i in eachindex(names) + @testset verbose=true "$(names[i])" for i in eachindex(names) nhs = neighborhood_searches[i] # Initialize with `seed = 1` @@ -267,17 +267,60 @@ update!(nhs, coords, coords) end - neighbors = [Int[] for _ in axes(coords, 2)] + # Test the regular `foreach_point_neighbor` + @testset "`foreach_point_neighbor`" begin + neighbors = [Int[] for _ in axes(coords, 2)] + foreach_point_neighbor(coords, coords, nhs, + parallelization_backend = SerialBackend()) do point, + neighbor, + pos_diff, + distance + append!(neighbors[point], neighbor) + end + + @test sort.(neighbors) == neighbors_expected + end - foreach_point_neighbor(coords, coords, nhs, - parallelization_backend = SerialBackend()) do point, - neighbor, - pos_diff, - distance - append!(neighbors[point], neighbor) + # Test manual loop with `foreach_neighbor` + @testset "Manual Loop with `foreach_neighbor`" begin + neighbors_manual = [Int[] for _ in axes(coords, 2)] + for point in axes(coords, 2) + foreach_neighbor(coords, coords, nhs, + point) do point, neighbor, pos_diff, distance + append!(neighbors_manual[point], neighbor) + end + end + + @test sort.(neighbors_manual) == neighbors_expected end - @test sort.(neighbors) == neighbors_expected + # Repeat with foreach_point_neighbor_unsafe + @testset "`foreach_point_neighbor_unsafe`" begin + neighbors_unsafe = [Int[] for _ in axes(coords, 2)] + foreach_point_neighbor_unsafe(coords, coords, nhs, + parallelization_backend = SerialBackend()) do point, + neighbor, + pos_diff, + distance + append!(neighbors_unsafe[point], neighbor) + end + + @test sort.(neighbors_unsafe) == neighbors_expected + end + + # Repeat with manual loop with `foreach_neighbor_unsafe` + @testset "Manual Loop with `foreach_neighbor_unsafe`" begin + neighbors_manual_unsafe = [Int[] for _ in axes(coords, 2)] + for point in axes(coords, 2) + foreach_neighbor_unsafe(coords, coords, nhs, + point) do point, neighbor, + pos_diff, distance + append!(neighbors_manual_unsafe[point], neighbor) + end + end + + @test sort.(neighbors_manual_unsafe) == neighbors_expected + end end end end From 149da3367698bac13d6e8c0b575ff66bf1e905bd Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Tue, 14 Apr 2026 11:02:41 +0200 Subject: [PATCH 04/11] Include unused tests for spatial hashing --- test/cell_lists/spatial_hashing.jl | 30 +++++++++++++++++------------- test/unittest.jl | 1 + 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/test/cell_lists/spatial_hashing.jl b/test/cell_lists/spatial_hashing.jl index 28d13b12..6a8e8104 100644 --- a/test/cell_lists/spatial_hashing.jl +++ b/test/cell_lists/spatial_hashing.jl @@ -71,9 +71,11 @@ @testset "Cell Coordinates Hash Function" begin # 1D coordinates - @test coordinates_flattened([1]) == UInt128(reinterpret(UInt32, Int32(1))) - @test coordinates_flattened([-1]) == UInt128(reinterpret(UInt32, Int32(-1))) - @test coordinates_flattened([0]) == Int128(0) + @test PointNeighbors.coordinates_flattened([1]) == + UInt128(reinterpret(UInt32, Int32(1))) + @test PointNeighbors.coordinates_flattened([-1]) == + UInt128(reinterpret(UInt32, Int32(-1))) + @test PointNeighbors.coordinates_flattened([0]) == Int128(0) # 2D coordinates coord2 = [-1, 1] @@ -82,18 +84,20 @@ # The second coordinate gives 1 shifted by 32 bits, so 1 * 2^32. expected2 = UInt128(2^32 - 1 + 2^32) - @test coordinates_flattened(coord2) == expected2 + @test PointNeighbors.coordinates_flattened(coord2) == expected2 # 3D coordinates coord3 = [1, 0, -1] expected3 = UInt128(1 + 0 * 2^32 + (2^32 - 1) * Int128(2)^64) - @test coordinates_flattened(coord3) == expected3 + @test PointNeighbors.coordinates_flattened(coord3) == expected3 # Extreme Int32 bounds max_val = Int32(typemax(Int32)) min_val = Int32(typemin(Int32)) - @test coordinates_flattened((max_val,)) == UInt128(reinterpret(UInt32, max_val)) - @test coordinates_flattened((min_val,)) == UInt128(reinterpret(UInt32, min_val)) + @test PointNeighbors.coordinates_flattened((max_val,)) == + UInt128(reinterpret(UInt32, max_val)) + @test PointNeighbors.coordinates_flattened((min_val,)) == + UInt128(reinterpret(UInt32, min_val)) # 3D extreme Int32 bounds coord4 = [min_val, max_val, Int32(0)] @@ -102,19 +106,19 @@ # `typemax(Int32)` gives the unsigned value 2^31 - 1. expected4 = UInt128(2^31 + (2^31 - 1) * 2^32) - @test coordinates_flattened(coord4) == expected4 + @test PointNeighbors.coordinates_flattened(coord4) == expected4 # Passing non-Int32-coercible should error large_val = typemax(Int32) + 1 - @test_throws InexactError coordinates_flattened([large_val]) + @test_throws InexactError PointNeighbors.coordinates_flattened([large_val]) small_val = typemin(Int32) - 1 - @test_throws InexactError coordinates_flattened([small_val]) + @test_throws InexactError PointNeighbors.coordinates_flattened([small_val]) - @test_throws InexactError coordinates_flattened([Inf]) - @test_throws InexactError coordinates_flattened([NaN]) + @test_throws InexactError PointNeighbors.coordinates_flattened([Inf]) + @test_throws InexactError PointNeighbors.coordinates_flattened([NaN]) # Too many dimensions should throw assertion error - @test_throws AssertionError coordinates_flattened([1, 2, 3, 4]) + @test_throws AssertionError PointNeighbors.coordinates_flattened([1, 2, 3, 4]) end end diff --git a/test/unittest.jl b/test/unittest.jl index f0981614..262b1753 100644 --- a/test/unittest.jl +++ b/test/unittest.jl @@ -7,4 +7,5 @@ include("nhs_precomputed.jl") include("neighborhood_search.jl") include("cell_lists/full_grid.jl") + include("cell_lists/spatial_hashing.jl") end; From e3194a6d815309f21cdff4331f608190126c5a35 Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Tue, 14 Apr 2026 11:11:38 +0200 Subject: [PATCH 05/11] Bump version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index d4654124..32e1fdf5 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PointNeighbors" uuid = "1c4d5385-0a27-49de-8e2c-43b175c8985c" authors = ["Erik Faulhaber "] -version = "0.6.6-dev" +version = "0.6.6" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" From f11877cef9d03edd2b969cd9565af214654f084a Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Wed, 1 Apr 2026 12:51:29 +0200 Subject: [PATCH 06/11] Rewrite benchmarking code --- benchmarks/run_benchmarks.jl | 46 ++++++---- benchmarks/smoothed_particle_hydrodynamics.jl | 89 ++++++++----------- test/point_cloud.jl | 25 +++++- 3 files changed, 91 insertions(+), 69 deletions(-) diff --git a/benchmarks/run_benchmarks.jl b/benchmarks/run_benchmarks.jl index ffabc805..062e6bbe 100644 --- a/benchmarks/run_benchmarks.jl +++ b/benchmarks/run_benchmarks.jl @@ -50,10 +50,11 @@ run_benchmark(benchmark_count_neighbors, (10, 10), 3, ``` """ function run_benchmark(benchmark, n_points_per_dimension, iterations, neighborhood_searches; + search_radius_factor = 3.0, parallelization_backend = PolyesterBackend(), names = ["Neighborhood search $i" for i in 1:length(neighborhood_searches)]', - seed = 1, perturbation_factor_position = 1.0) + seed = 1, perturbation_factor_position = 1.0, shuffle = false) # Multiply number of points in each iteration (roughly) by this factor scaling_factor = 4 per_dimension_factor = scaling_factor^(1 / length(n_points_per_dimension)) @@ -64,24 +65,28 @@ function run_benchmark(benchmark, n_points_per_dimension, iterations, neighborho times = zeros(iterations, length(neighborhood_searches)) for iter in 1:iterations - coordinates = point_cloud(sizes[iter]; seed, perturbation_factor_position) + coordinates_ = point_cloud(sizes[iter]; seed, perturbation_factor_position, shuffle) + coordinates = convert.(typeof(search_radius_factor), coordinates_) domain_size = maximum(sizes[iter]) + 1 # Normalize domain size to 1 coordinates ./= domain_size # Make this Float32 to make sure that Float32 benchmarks use Float32 exclusively - search_radius = 4.0f0 / domain_size + search_radius = search_radius_factor / domain_size n_particles = size(coordinates, 2) neighborhood_searches_copy = copy_neighborhood_search.(neighborhood_searches, search_radius, n_particles) for i in eachindex(neighborhood_searches_copy) - neighborhood_search = neighborhood_searches_copy[i] - PointNeighbors.initialize!(neighborhood_search, coordinates, coordinates) + neighborhood_search_ = neighborhood_searches_copy[i] + neighborhood_search = PointNeighbors.Adapt.adapt(parallelization_backend, + neighborhood_search_) + coords = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates) + PointNeighbors.initialize!(neighborhood_search, coords, coords) - time = benchmark(neighborhood_search, coordinates; parallelization_backend) + time = benchmark(neighborhood_search, coords; parallelization_backend) times[iter, i] = time time_string = BenchmarkTools.prettytime(time * 1e9) time_string_per_particle = BenchmarkTools.prettytime(time * 1e9 / n_particles) @@ -170,23 +175,30 @@ include("benchmarks/benchmarks.jl") run_benchmark_gpu(benchmark_n_body, (10, 10), 3) ``` """ -function run_benchmark_gpu(benchmark, n_points_per_dimension, iterations; kwargs...) +function run_benchmark_gpu(benchmark, n_points_per_dimension, iterations; + parallelization_backend=PolyesterBackend(), kwargs...) NDIMS = length(n_points_per_dimension) min_corner = 0.0f0 .* n_points_per_dimension max_corner = Float32.(n_points_per_dimension ./ maximum(n_points_per_dimension)) - neighborhood_searches = [GridNeighborhoodSearch{NDIMS}(search_radius = 0.0f0, - cell_list = FullGridCellList(; - search_radius = 0.0f0, - min_corner, - max_corner)) - PrecomputedNeighborhoodSearch{NDIMS}(search_radius = 0.0f0)] - - names = ["GridNeighborhoodSearch with FullGridCellList";; - "PrecomputedNeighborhoodSearch"] + cell_list = FullGridCellList(; search_radius = 0.0f0, min_corner, max_corner) + grid_nhs = GridNeighborhoodSearch{NDIMS}(; search_radius = 0.0f0, cell_list, + update_strategy = ParallelUpdate()) + transpose_backend = parallelization_backend isa PointNeighbors.KernelAbstractions.GPU + neighborhood_searches = [ + grid_nhs + PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0f0, + update_neighborhood_search = grid_nhs, + transpose_backend) + ] + + names = [ + "GridNeighborhoodSearch with FullGridCellList";; + "PrecomputedNeighborhoodSearch" + ] run_benchmark(benchmark, n_points_per_dimension, iterations, - neighborhood_searches; names, kwargs...) + neighborhood_searches; names, parallelization_backend, kwargs...) end """ diff --git a/benchmarks/smoothed_particle_hydrodynamics.jl b/benchmarks/smoothed_particle_hydrodynamics.jl index 7286baca..bfb760ac 100644 --- a/benchmarks/smoothed_particle_hydrodynamics.jl +++ b/benchmarks/smoothed_particle_hydrodynamics.jl @@ -1,4 +1,5 @@ using PointNeighbors +using PointNeighbors.Adapt using TrixiParticles using BenchmarkTools @@ -43,47 +44,25 @@ This method is used to simulate an incompressible fluid. """ function benchmark_wcsph(neighborhood_search, coordinates; parallelization_backend = default_backend(coordinates)) - density = 1000.0 - particle_spacing = PointNeighbors.search_radius(neighborhood_search) / 3 - fluid = InitialCondition(; coordinates, density, mass = 0.1, particle_spacing) - - sound_speed = 10.0 - state_equation = StateEquationCole(; sound_speed, reference_density = density, - exponent = 1) - - viscosity = ArtificialViscosityMonaghan(alpha = 0.02, beta = 0.0) - density_diffusion = DensityDiffusionMolteniColagrossi(delta = 0.1) - - __benchmark_wcsph_inner(neighborhood_search, fluid, state_equation, - viscosity, density_diffusion, parallelization_backend) -end - -""" - benchmark_wcsph_fp32(neighborhood_search, coordinates; - parallelization_backend = default_backend(coordinates)) + # System initialization has to happen on the CPU + coordinates_cpu = PointNeighbors.Adapt.adapt(Array, coordinates) -Like [`benchmark_wcsph`](@ref), but using single precision floating point numbers. -""" -function benchmark_wcsph_fp32(neighborhood_search, coordinates_; - parallelization_backend = default_backend(coordinates_)) - coordinates = convert(Matrix{Float32}, coordinates_) - density = 1000.0f0 + search_radius = PointNeighbors.search_radius(neighborhood_search) + ELTYPE = typeof(search_radius) + density = convert(ELTYPE, 1000.0) particle_spacing = PointNeighbors.search_radius(neighborhood_search) / 3 - fluid = InitialCondition(; coordinates, density, mass = 0.1f0, particle_spacing) + fluid = InitialCondition(; coordinates = coordinates_cpu, density, + mass = convert(ELTYPE, 0.1) * particle_spacing, + particle_spacing) - sound_speed = 10.0f0 + sound_speed = convert(ELTYPE, 10.0) state_equation = StateEquationCole(; sound_speed, reference_density = density, exponent = 1) - viscosity = ArtificialViscosityMonaghan(alpha = 0.02f0, beta = 0.0f0) - density_diffusion = DensityDiffusionMolteniColagrossi(delta = 0.1f0) - - __benchmark_wcsph_inner(neighborhood_search, fluid, state_equation, - viscosity, density_diffusion, parallelization_backend) -end + viscosity = ArtificialViscosityMonaghan(alpha = convert(ELTYPE, 0.02), + beta = convert(ELTYPE, 0.0)) + density_diffusion = DensityDiffusionMolteniColagrossi(delta = convert(ELTYPE, 0.1)) -function __benchmark_wcsph_inner(neighborhood_search, initial_condition, state_equation, - viscosity, density_diffusion, parallelization_backend) # Compact support == 2 * smoothing length for these kernels smoothing_length = PointNeighbors.search_radius(neighborhood_search) / 2 if ndims(neighborhood_search) == 1 @@ -92,23 +71,21 @@ function __benchmark_wcsph_inner(neighborhood_search, initial_condition, state_e smoothing_kernel = WendlandC2Kernel{ndims(neighborhood_search)}() end - fluid_system = WeaklyCompressibleSPHSystem(initial_condition, ContinuityDensity(), + fluid_system = WeaklyCompressibleSPHSystem(fluid, ContinuityDensity(), state_equation, smoothing_kernel, smoothing_length, viscosity = viscosity, density_diffusion = density_diffusion) - system = PointNeighbors.Adapt.adapt(parallelization_backend, fluid_system) + system = Adapt.adapt(parallelization_backend, fluid_system) # Remove unnecessary data structures that are only used for initialization - neighborhood_search_ = PointNeighbors.freeze_neighborhood_search(neighborhood_search) + nhs = PointNeighbors.freeze_neighborhood_search(neighborhood_search) - nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search_) semi = DummySemidiscretization(nhs, parallelization_backend, true) - v = PointNeighbors.Adapt.adapt(parallelization_backend, - vcat(initial_condition.velocity, - initial_condition.density')) - u = PointNeighbors.Adapt.adapt(parallelization_backend, initial_condition.coordinates) + v = Adapt.adapt(parallelization_backend, + vcat(fluid.velocity, fluid.density')) + u = Adapt.adapt(parallelization_backend, fluid.coordinates) dv = zero(v) # Initialize the system @@ -128,8 +105,15 @@ This method is used to simulate an elastic structure. """ function benchmark_tlsph(neighborhood_search, coordinates; parallelization_backend = default_backend(coordinates)) - material = (density = 1000.0, E = 1.4e6, nu = 0.4) - solid = InitialCondition(; coordinates, density = material.density, mass = 0.1) + # System initialization has to happen on the CPU + coordinates_cpu = PointNeighbors.Adapt.adapt(Array, coordinates) + + search_radius = PointNeighbors.search_radius(neighborhood_search) + ELTYPE = typeof(search_radius) + material = (density = convert(ELTYPE, 1000.0), E = convert(ELTYPE, 1.4e6), + nu = convert(ELTYPE, 0.4)) + solid = InitialCondition(; coordinates = coordinates_cpu, + density = material.density, mass = convert(ELTYPE, 0.1)) # Compact support == 2 * smoothing length for these kernels smoothing_length_ = PointNeighbors.search_radius(neighborhood_search) / 2 @@ -142,15 +126,20 @@ function benchmark_tlsph(neighborhood_search, coordinates; solid_system = TotalLagrangianSPHSystem(solid, smoothing_kernel, smoothing_length, material.E, material.nu) - semi = DummySemidiscretization(neighborhood_search, parallelization_backend, true) + system_ = Adapt.adapt(parallelization_backend, solid_system) - v = copy(solid.velocity) - u = copy(solid.coordinates) + # Remove unnecessary data structures that are only used for initialization + nhs = PointNeighbors.freeze_neighborhood_search(neighborhood_search) + system = TrixiParticles.@set system_.self_interaction_nhs = nhs + + semi = DummySemidiscretization(nhs, parallelization_backend, true) + + v = Adapt.adapt(parallelization_backend, copy(solid.velocity)) + u = Adapt.adapt(parallelization_backend, copy(solid.coordinates)) dv = zero(v) # Initialize the system - TrixiParticles.initialize!(solid_system, semi) + TrixiParticles.initialize!(system, semi) - return @belapsed TrixiParticles.interact!($dv, $v, $u, $v, $u, - $solid_system, $solid_system, $semi) + return @belapsed TrixiParticles.interact_structure_structure2!($dv, $v, $system, $semi) end diff --git a/test/point_cloud.jl b/test/point_cloud.jl index ba99b2e1..56a280de 100644 --- a/test/point_cloud.jl +++ b/test/point_cloud.jl @@ -2,7 +2,7 @@ using Random # Generate a rectangular point cloud, optionally with a perturbation in the point positions function point_cloud(n_points_per_dimension; - seed = 1, perturbation_factor_position = 1.0) + seed = 1, perturbation_factor_position = 1.0, shuffle = false) # Fixed seed to ensure reproducibility Random.seed!(seed) @@ -10,8 +10,15 @@ function point_cloud(n_points_per_dimension; coordinates = Array{Float64}(undef, n_dims, prod(n_points_per_dimension)) cartesian_indices = CartesianIndices(n_points_per_dimension) + # Extra data structures for the sorting code below + cell_coords = Vector{SVector{n_dims, Int}}(undef, size(coordinates, 2)) + cell_size = ntuple(dim -> 4.0, n_dims) + for i in axes(coordinates, 2) - coordinates[:, i] .= Tuple(cartesian_indices[i]) + point_coords = SVector(Tuple(cartesian_indices[i])) + coordinates[:, i] .= point_coords + cell_coords[i] = PointNeighbors.cell_coords(point_coords, nothing, nothing, + cell_size) .+ 1 end # A standard deviation of 0.05 in the particle coordinates @@ -21,6 +28,20 @@ function point_cloud(n_points_per_dimension; # The benchmark results are also consistent with the timer output of the simulation. perturb!(coordinates, perturbation_factor_position * 0.05) + # Sort by Z index (with `using Morton`) + # permutation = sortperm(cell_coords, by = c -> cartesian2morton(c)) + + # Sort by linear cell index + # permutation = sortperm(cell_coords) + # coordinates .= coordinates[:, permutation] + + if shuffle + # Sort randomly + permutation = shuffle(axes(coordinates, 2)) + + coordinates .= coordinates[:, permutation] + end + return coordinates end From dd890ebe44dafd19978f8168456863f40b8bb3f0 Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Tue, 7 Apr 2026 19:28:12 +0200 Subject: [PATCH 07/11] Use random density for WCSPH benchmark --- benchmarks/smoothed_particle_hydrodynamics.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/benchmarks/smoothed_particle_hydrodynamics.jl b/benchmarks/smoothed_particle_hydrodynamics.jl index bfb760ac..88e38508 100644 --- a/benchmarks/smoothed_particle_hydrodynamics.jl +++ b/benchmarks/smoothed_particle_hydrodynamics.jl @@ -55,6 +55,11 @@ function benchmark_wcsph(neighborhood_search, coordinates; mass = convert(ELTYPE, 0.1) * particle_spacing, particle_spacing) + # Make sure that the computed forces are not all zero + for i in eachindex(fluid.density) + fluid.density[i] += rand(eltype(fluid.density)) + end + sound_speed = convert(ELTYPE, 10.0) state_equation = StateEquationCole(; sound_speed, reference_density = density, exponent = 1) From 6ecd287092ab2d70cdac9f2d32700ff5d4b630d7 Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Tue, 7 Apr 2026 19:28:28 +0200 Subject: [PATCH 08/11] Sort point cloud for benchmarking --- test/neighborhood_search.jl | 6 +++--- test/point_cloud.jl | 24 ++++++++++++++---------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/test/neighborhood_search.jl b/test/neighborhood_search.jl index 3820e11d..ed1a80dd 100644 --- a/test/neighborhood_search.jl +++ b/test/neighborhood_search.jl @@ -142,14 +142,14 @@ "($(seed == 1 ? "`initialize!`" : "`update!`"))" @testset verbose=true "$(name(cloud_size, seed)))" for cloud_size in cloud_sizes, seed in seeds - coords = point_cloud(cloud_size, seed = seed) + search_radius = 2.5 + coords = point_cloud(cloud_size, search_radius, seed = seed) NDIMS = length(cloud_size) n_points = size(coords, 2) - search_radius = 2.5 # Use different coordinates for `initialize!` and then `update!` with the # correct coordinates to make sure that `update!` is working as well. - coords_initialize = point_cloud(cloud_size, seed = 1) + coords_initialize = point_cloud(cloud_size, search_radius, seed = 1) # Compute expected neighbor lists by brute-force looping over all points # as potential neighbors (`TrivialNeighborhoodSearch`). diff --git a/test/point_cloud.jl b/test/point_cloud.jl index 56a280de..64c41bec 100644 --- a/test/point_cloud.jl +++ b/test/point_cloud.jl @@ -1,8 +1,9 @@ using Random # Generate a rectangular point cloud, optionally with a perturbation in the point positions -function point_cloud(n_points_per_dimension; - seed = 1, perturbation_factor_position = 1.0, shuffle = false) +function point_cloud(n_points_per_dimension, search_radius; + seed = 1, perturbation_factor_position = 1.0, + sort = true, shuffle = false) # Fixed seed to ensure reproducibility Random.seed!(seed) @@ -12,7 +13,7 @@ function point_cloud(n_points_per_dimension; # Extra data structures for the sorting code below cell_coords = Vector{SVector{n_dims, Int}}(undef, size(coordinates, 2)) - cell_size = ntuple(dim -> 4.0, n_dims) + cell_size = ntuple(dim -> search_radius, n_dims) for i in axes(coordinates, 2) point_coords = SVector(Tuple(cartesian_indices[i])) @@ -28,17 +29,20 @@ function point_cloud(n_points_per_dimension; # The benchmark results are also consistent with the timer output of the simulation. perturb!(coordinates, perturbation_factor_position * 0.05) - # Sort by Z index (with `using Morton`) - # permutation = sortperm(cell_coords, by = c -> cartesian2morton(c)) - # Sort by linear cell index - # permutation = sortperm(cell_coords) - # coordinates .= coordinates[:, permutation] + if sort + if shuffle + throw(ArgumentError("cannot sort and shuffle at the same time")) + end + + # Sort by Z index (with `using Morton`) + # permutation = sortperm(cell_coords, by = c -> cartesian2morton(c)) - if shuffle + permutation = sortperm(cell_coords) + coordinates .= coordinates[:, permutation] + elseif shuffle # Sort randomly permutation = shuffle(axes(coordinates, 2)) - coordinates .= coordinates[:, permutation] end From 9bce3baab5f5c16a938a43d9cddc2b1e122a7063 Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Thu, 9 Apr 2026 10:44:49 +0200 Subject: [PATCH 09/11] Pre-sort benchmark and add `run_benchmark_full_grid` --- benchmarks/run_benchmarks.jl | 49 ++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/benchmarks/run_benchmarks.jl b/benchmarks/run_benchmarks.jl index 062e6bbe..ff08ebe3 100644 --- a/benchmarks/run_benchmarks.jl +++ b/benchmarks/run_benchmarks.jl @@ -65,7 +65,8 @@ function run_benchmark(benchmark, n_points_per_dimension, iterations, neighborho times = zeros(iterations, length(neighborhood_searches)) for iter in 1:iterations - coordinates_ = point_cloud(sizes[iter]; seed, perturbation_factor_position, shuffle) + coordinates_ = point_cloud(sizes[iter], search_radius_factor; + seed, perturbation_factor_position, shuffle) coordinates = convert.(typeof(search_radius_factor), coordinates_) domain_size = maximum(sizes[iter]) + 1 @@ -189,7 +190,7 @@ function run_benchmark_gpu(benchmark, n_points_per_dimension, iterations; grid_nhs PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0f0, update_neighborhood_search = grid_nhs, - transpose_backend) + transpose_backend)#, max_neighbors=128) ] names = [ @@ -201,6 +202,50 @@ function run_benchmark_gpu(benchmark, n_points_per_dimension, iterations; neighborhood_searches; names, parallelization_backend, kwargs...) end +""" + run_benchmark_full_grid(benchmark, n_points_per_dimension, iterations; kwargs...) + +Shortcut to call [`run_benchmark`](@ref) with a `GridNeighborhoodSearch` with a +`FullGridCellList`. This is the neighborhood search implementation that is used +in TrixiParticles.jl when performance is important. +Use this function to benchmark and profile TrixiParticles.jl kernels. + +# Arguments +- `benchmark`: The benchmark function. See [`benchmark_count_neighbors`](@ref), + [`benchmark_n_body`](@ref), [`benchmark_wcsph`](@ref), + [`benchmark_wcsph_fp32`](@ref) and [`benchmark_tlsph`](@ref). +- `n_points_per_dimension`: Initial resolution as tuple. The product is the initial number + of points. For example, use `(100, 100)` for a 2D benchmark or + `(10, 10, 10)` for a 3D benchmark. +- `iterations`: Number of refinement iterations + +# Keywords +See [`run_benchmark`](@ref) for a list of available keywords. + +# Examples +```julia +include("benchmarks/benchmarks.jl") + +run_benchmark_full_grid(benchmark_n_body, (10, 10), 3) +``` +""" +function run_benchmark_full_grid(benchmark, n_points_per_dimension, iterations; + parallelization_backend=PolyesterBackend(), kwargs...) + NDIMS = length(n_points_per_dimension) + + min_corner = 0.0f0 .* n_points_per_dimension + max_corner = Float32.(n_points_per_dimension ./ maximum(n_points_per_dimension)) + cell_list = FullGridCellList(; search_radius = 0.0f0, min_corner, max_corner) + grid_nhs = GridNeighborhoodSearch{NDIMS}(; search_radius = 0.0f0, cell_list, + update_strategy = ParallelUpdate()) + neighborhood_searches = [grid_nhs] + + names = ["GridNeighborhoodSearch with FullGridCellList";;] + + run_benchmark(benchmark, n_points_per_dimension, iterations, + neighborhood_searches; names, parallelization_backend, kwargs...) +end + """ plot_benchmark(n_particles_vec, times; kwargs...) From 7f814ce864e6405999b4c97531110852dedf8fea Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Tue, 21 Apr 2026 12:12:34 +0200 Subject: [PATCH 10/11] Update TLSPH benchmark --- benchmarks/smoothed_particle_hydrodynamics.jl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/benchmarks/smoothed_particle_hydrodynamics.jl b/benchmarks/smoothed_particle_hydrodynamics.jl index 88e38508..06e24301 100644 --- a/benchmarks/smoothed_particle_hydrodynamics.jl +++ b/benchmarks/smoothed_particle_hydrodynamics.jl @@ -117,8 +117,11 @@ function benchmark_tlsph(neighborhood_search, coordinates; ELTYPE = typeof(search_radius) material = (density = convert(ELTYPE, 1000.0), E = convert(ELTYPE, 1.4e6), nu = convert(ELTYPE, 0.4)) + + # The `particle_spacing` is only required for setting the type of the initial condition solid = InitialCondition(; coordinates = coordinates_cpu, - density = material.density, mass = convert(ELTYPE, 0.1)) + density = material.density, mass = convert(ELTYPE, 0.1), + particle_spacing = search_radius) # Compact support == 2 * smoothing length for these kernels smoothing_length_ = PointNeighbors.search_radius(neighborhood_search) / 2 @@ -129,8 +132,9 @@ function benchmark_tlsph(neighborhood_search, coordinates; smoothing_kernel = WendlandC2Kernel{ndims(neighborhood_search)}() end + penalty_force = PenaltyForceGanzenmueller(alpha = convert(ELTYPE, 0.1)) solid_system = TotalLagrangianSPHSystem(solid, smoothing_kernel, smoothing_length, - material.E, material.nu) + material.E, material.nu; penalty_force) system_ = Adapt.adapt(parallelization_backend, solid_system) # Remove unnecessary data structures that are only used for initialization @@ -146,5 +150,5 @@ function benchmark_tlsph(neighborhood_search, coordinates; # Initialize the system TrixiParticles.initialize!(system, semi) - return @belapsed TrixiParticles.interact_structure_structure2!($dv, $v, $system, $semi) + return @belapsed TrixiParticles.interact_structure_structure!($dv, $v, $system, $semi) end From 49bfced5323a2fdd5d79967dde7d159248fe08ee Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Mon, 4 May 2026 09:42:04 +0200 Subject: [PATCH 11/11] Fix for TP 0.5 --- benchmarks/smoothed_particle_hydrodynamics.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/benchmarks/smoothed_particle_hydrodynamics.jl b/benchmarks/smoothed_particle_hydrodynamics.jl index 06e24301..3d35f47d 100644 --- a/benchmarks/smoothed_particle_hydrodynamics.jl +++ b/benchmarks/smoothed_particle_hydrodynamics.jl @@ -76,10 +76,9 @@ function benchmark_wcsph(neighborhood_search, coordinates; smoothing_kernel = WendlandC2Kernel{ndims(neighborhood_search)}() end - fluid_system = WeaklyCompressibleSPHSystem(fluid, ContinuityDensity(), + fluid_system = WeaklyCompressibleSPHSystem(fluid; density_calculator = ContinuityDensity(), state_equation, smoothing_kernel, - smoothing_length, viscosity = viscosity, - density_diffusion = density_diffusion) + smoothing_length, viscosity, density_diffusion) system = Adapt.adapt(parallelization_backend, fluid_system)