diff --git a/benchmarks/run_benchmarks.jl b/benchmarks/run_benchmarks.jl index ffabc805..ff08ebe3 100644 --- a/benchmarks/run_benchmarks.jl +++ b/benchmarks/run_benchmarks.jl @@ -50,10 +50,11 @@ run_benchmark(benchmark_count_neighbors, (10, 10), 3, ``` """ function run_benchmark(benchmark, n_points_per_dimension, iterations, neighborhood_searches; + search_radius_factor = 3.0, parallelization_backend = PolyesterBackend(), names = ["Neighborhood search $i" for i in 1:length(neighborhood_searches)]', - seed = 1, perturbation_factor_position = 1.0) + seed = 1, perturbation_factor_position = 1.0, shuffle = false) # Multiply number of points in each iteration (roughly) by this factor scaling_factor = 4 per_dimension_factor = scaling_factor^(1 / length(n_points_per_dimension)) @@ -64,24 +65,29 @@ function run_benchmark(benchmark, n_points_per_dimension, iterations, neighborho times = zeros(iterations, length(neighborhood_searches)) for iter in 1:iterations - coordinates = point_cloud(sizes[iter]; seed, perturbation_factor_position) + coordinates_ = point_cloud(sizes[iter], search_radius_factor; + seed, perturbation_factor_position, shuffle) + coordinates = convert.(typeof(search_radius_factor), coordinates_) domain_size = maximum(sizes[iter]) + 1 # Normalize domain size to 1 coordinates ./= domain_size # Make this Float32 to make sure that Float32 benchmarks use Float32 exclusively - search_radius = 4.0f0 / domain_size + search_radius = search_radius_factor / domain_size n_particles = size(coordinates, 2) neighborhood_searches_copy = copy_neighborhood_search.(neighborhood_searches, search_radius, n_particles) for i in eachindex(neighborhood_searches_copy) - neighborhood_search = neighborhood_searches_copy[i] - PointNeighbors.initialize!(neighborhood_search, coordinates, coordinates) + neighborhood_search_ = neighborhood_searches_copy[i] + neighborhood_search = PointNeighbors.Adapt.adapt(parallelization_backend, + neighborhood_search_) + coords = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates) + PointNeighbors.initialize!(neighborhood_search, coords, coords) - time = benchmark(neighborhood_search, coordinates; parallelization_backend) + time = benchmark(neighborhood_search, coords; parallelization_backend) times[iter, i] = time time_string = BenchmarkTools.prettytime(time * 1e9) time_string_per_particle = BenchmarkTools.prettytime(time * 1e9 / n_particles) @@ -170,23 +176,74 @@ include("benchmarks/benchmarks.jl") run_benchmark_gpu(benchmark_n_body, (10, 10), 3) ``` """ -function run_benchmark_gpu(benchmark, n_points_per_dimension, iterations; kwargs...) +function run_benchmark_gpu(benchmark, n_points_per_dimension, iterations; + parallelization_backend=PolyesterBackend(), kwargs...) NDIMS = length(n_points_per_dimension) min_corner = 0.0f0 .* n_points_per_dimension max_corner = Float32.(n_points_per_dimension ./ maximum(n_points_per_dimension)) - neighborhood_searches = [GridNeighborhoodSearch{NDIMS}(search_radius = 0.0f0, - cell_list = FullGridCellList(; - search_radius = 0.0f0, - min_corner, - max_corner)) - PrecomputedNeighborhoodSearch{NDIMS}(search_radius = 0.0f0)] - - names = ["GridNeighborhoodSearch with FullGridCellList";; - "PrecomputedNeighborhoodSearch"] + cell_list = FullGridCellList(; search_radius = 0.0f0, min_corner, max_corner) + grid_nhs = GridNeighborhoodSearch{NDIMS}(; search_radius = 0.0f0, cell_list, + update_strategy = ParallelUpdate()) + transpose_backend = parallelization_backend isa PointNeighbors.KernelAbstractions.GPU + neighborhood_searches = [ + grid_nhs + PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0f0, + update_neighborhood_search = grid_nhs, + transpose_backend)#, max_neighbors=128) + ] + + names = [ + "GridNeighborhoodSearch with FullGridCellList";; + "PrecomputedNeighborhoodSearch" + ] run_benchmark(benchmark, n_points_per_dimension, iterations, - neighborhood_searches; names, kwargs...) + neighborhood_searches; names, parallelization_backend, kwargs...) +end + +""" + run_benchmark_full_grid(benchmark, n_points_per_dimension, iterations; kwargs...) + +Shortcut to call [`run_benchmark`](@ref) with a `GridNeighborhoodSearch` with a +`FullGridCellList`. This is the neighborhood search implementation that is used +in TrixiParticles.jl when performance is important. +Use this function to benchmark and profile TrixiParticles.jl kernels. + +# Arguments +- `benchmark`: The benchmark function. See [`benchmark_count_neighbors`](@ref), + [`benchmark_n_body`](@ref), [`benchmark_wcsph`](@ref), + [`benchmark_wcsph_fp32`](@ref) and [`benchmark_tlsph`](@ref). +- `n_points_per_dimension`: Initial resolution as tuple. The product is the initial number + of points. For example, use `(100, 100)` for a 2D benchmark or + `(10, 10, 10)` for a 3D benchmark. +- `iterations`: Number of refinement iterations + +# Keywords +See [`run_benchmark`](@ref) for a list of available keywords. + +# Examples +```julia +include("benchmarks/benchmarks.jl") + +run_benchmark_full_grid(benchmark_n_body, (10, 10), 3) +``` +""" +function run_benchmark_full_grid(benchmark, n_points_per_dimension, iterations; + parallelization_backend=PolyesterBackend(), kwargs...) + NDIMS = length(n_points_per_dimension) + + min_corner = 0.0f0 .* n_points_per_dimension + max_corner = Float32.(n_points_per_dimension ./ maximum(n_points_per_dimension)) + cell_list = FullGridCellList(; search_radius = 0.0f0, min_corner, max_corner) + grid_nhs = GridNeighborhoodSearch{NDIMS}(; search_radius = 0.0f0, cell_list, + update_strategy = ParallelUpdate()) + neighborhood_searches = [grid_nhs] + + names = ["GridNeighborhoodSearch with FullGridCellList";;] + + run_benchmark(benchmark, n_points_per_dimension, iterations, + neighborhood_searches; names, parallelization_backend, kwargs...) end """ diff --git a/benchmarks/smoothed_particle_hydrodynamics.jl b/benchmarks/smoothed_particle_hydrodynamics.jl index 2eac4304..13d95c5a 100644 --- a/benchmarks/smoothed_particle_hydrodynamics.jl +++ b/benchmarks/smoothed_particle_hydrodynamics.jl @@ -1,4 +1,5 @@ using PointNeighbors +using PointNeighbors.Adapt using TrixiParticles using BenchmarkTools @@ -43,47 +44,30 @@ This method is used to simulate an incompressible fluid. """ function benchmark_wcsph(neighborhood_search, coordinates; parallelization_backend = default_backend(coordinates)) - density = 1000.0 - particle_spacing = PointNeighbors.search_radius(neighborhood_search) / 3 - fluid = InitialCondition(; coordinates, density, mass = 0.1, particle_spacing) - - sound_speed = 10.0 - state_equation = StateEquationCole(; sound_speed, reference_density = density, - exponent = 1) - - viscosity = ArtificialViscosityMonaghan(alpha = 0.02, beta = 0.0) - density_diffusion = DensityDiffusionMolteniColagrossi(delta = 0.1) + # System initialization has to happen on the CPU + coordinates_cpu = PointNeighbors.Adapt.adapt(Array, coordinates) - __benchmark_wcsph_inner(neighborhood_search, fluid, state_equation, - viscosity, density_diffusion, parallelization_backend) -end - -""" - benchmark_wcsph_fp32(neighborhood_search, coordinates; - parallelization_backend = default_backend(coordinates)) - -Like [`benchmark_wcsph`](@ref), but using single precision floating point numbers. -""" -function benchmark_wcsph_fp32(neighborhood_search, coordinates_; - parallelization_backend = default_backend(coordinates_)) - coordinates = convert(Matrix{Float32}, coordinates_) - density = 1000.0f0 + search_radius = PointNeighbors.search_radius(neighborhood_search) + ELTYPE = typeof(search_radius) + density = convert(ELTYPE, 1000.0) particle_spacing = PointNeighbors.search_radius(neighborhood_search) / 3 - fluid = InitialCondition(; coordinates, density, mass = 0.1f0, particle_spacing) + fluid = InitialCondition(; coordinates = coordinates_cpu, density, + mass = convert(ELTYPE, 0.1) * particle_spacing, + particle_spacing) - sound_speed = 10.0f0 + # Make sure that the computed forces are not all zero + for i in eachindex(fluid.density) + fluid.density[i] += rand(eltype(fluid.density)) + end + + sound_speed = convert(ELTYPE, 10.0) state_equation = StateEquationCole(; sound_speed, reference_density = density, exponent = 1) - viscosity = ArtificialViscosityMonaghan(alpha = 0.02f0, beta = 0.0f0) - density_diffusion = DensityDiffusionMolteniColagrossi(delta = 0.1f0) - - __benchmark_wcsph_inner(neighborhood_search, fluid, state_equation, - viscosity, density_diffusion, parallelization_backend) -end + viscosity = ArtificialViscosityMonaghan(alpha = convert(ELTYPE, 0.02), + beta = convert(ELTYPE, 0.0)) + density_diffusion = DensityDiffusionMolteniColagrossi(delta = convert(ELTYPE, 0.1)) -function __benchmark_wcsph_inner(neighborhood_search, initial_condition, state_equation, - viscosity, density_diffusion, parallelization_backend) # Compact support == 2 * smoothing length for these kernels smoothing_length = PointNeighbors.search_radius(neighborhood_search) / 2 if ndims(neighborhood_search) == 1 @@ -92,24 +76,22 @@ function __benchmark_wcsph_inner(neighborhood_search, initial_condition, state_e smoothing_kernel = WendlandC2Kernel{ndims(neighborhood_search)}() end - fluid_system = WeaklyCompressibleSPHSystem(initial_condition; + fluid_system = WeaklyCompressibleSPHSystem(fluid; density_calculator = ContinuityDensity(), state_equation, smoothing_kernel, smoothing_length, viscosity, density_diffusion) - system = PointNeighbors.Adapt.adapt(parallelization_backend, fluid_system) + system = Adapt.adapt(parallelization_backend, fluid_system) # Remove unnecessary data structures that are only used for initialization - neighborhood_search_ = PointNeighbors.freeze_neighborhood_search(neighborhood_search) + nhs = PointNeighbors.freeze_neighborhood_search(neighborhood_search) - nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search_) semi = DummySemidiscretization(nhs, parallelization_backend, true) - v = PointNeighbors.Adapt.adapt(parallelization_backend, - vcat(initial_condition.velocity, - initial_condition.density')) - u = PointNeighbors.Adapt.adapt(parallelization_backend, initial_condition.coordinates) + v = Adapt.adapt(parallelization_backend, + vcat(fluid.velocity, fluid.density')) + u = Adapt.adapt(parallelization_backend, fluid.coordinates) dv = zero(v) # Initialize the system @@ -129,8 +111,18 @@ This method is used to simulate an elastic structure. """ function benchmark_tlsph(neighborhood_search, coordinates; parallelization_backend = default_backend(coordinates)) - material = (density = 1000.0, E = 1.4e6, nu = 0.4) - solid = InitialCondition(; coordinates, density = material.density, mass = 0.1) + # System initialization has to happen on the CPU + coordinates_cpu = PointNeighbors.Adapt.adapt(Array, coordinates) + + search_radius = PointNeighbors.search_radius(neighborhood_search) + ELTYPE = typeof(search_radius) + material = (density = convert(ELTYPE, 1000.0), E = convert(ELTYPE, 1.4e6), + nu = convert(ELTYPE, 0.4)) + + # The `particle_spacing` is only required for setting the type of the initial condition + solid = InitialCondition(; coordinates = coordinates_cpu, + density = material.density, mass = convert(ELTYPE, 0.1), + particle_spacing = search_radius) # Compact support == 2 * smoothing length for these kernels smoothing_length_ = PointNeighbors.search_radius(neighborhood_search) / 2 @@ -141,18 +133,24 @@ function benchmark_tlsph(neighborhood_search, coordinates; smoothing_kernel = WendlandC2Kernel{ndims(neighborhood_search)}() end + penalty_force = PenaltyForceGanzenmueller(alpha = convert(ELTYPE, 0.1)) solid_system = TotalLagrangianSPHSystem(solid; smoothing_kernel, smoothing_length, young_modulus = material.E, - poisson_ratio = material.nu) - semi = DummySemidiscretization(neighborhood_search, parallelization_backend, true) + poisson_ratio = material.nu, penalty_force) + system_ = Adapt.adapt(parallelization_backend, solid_system) - v = copy(solid.velocity) - u = copy(solid.coordinates) + # Remove unnecessary data structures that are only used for initialization + nhs = PointNeighbors.freeze_neighborhood_search(neighborhood_search) + system = TrixiParticles.@set system_.self_interaction_nhs = nhs + + semi = DummySemidiscretization(nhs, parallelization_backend, true) + + v = Adapt.adapt(parallelization_backend, copy(solid.velocity)) + u = Adapt.adapt(parallelization_backend, copy(solid.coordinates)) dv = zero(v) # Initialize the system - TrixiParticles.initialize!(solid_system, semi) + TrixiParticles.initialize!(system, semi) - return @belapsed TrixiParticles.interact!($dv, $v, $u, $v, $u, - $solid_system, $solid_system, $semi) + return @belapsed TrixiParticles.interact_structure_structure!($dv, $v, $system, $semi) end diff --git a/src/PointNeighbors.jl b/src/PointNeighbors.jl index 58b38687..faa8cefc 100644 --- a/src/PointNeighbors.jl +++ b/src/PointNeighbors.jl @@ -21,7 +21,8 @@ include("nhs_precomputed.jl") include("gpu.jl") export foreach_point_neighbor, foreach_point_neighbor_unsafe, - foreach_neighbor, foreach_neighbor_unsafe + foreach_neighbor, foreach_neighbor_unsafe, + mapreduce_neighbor, mapreduce_neighbor_unsafe export TrivialNeighborhoodSearch, GridNeighborhoodSearch, PrecomputedNeighborhoodSearch export DictionaryCellList, FullGridCellList, SpatialHashingCellList export DynamicVectorOfVectors diff --git a/src/gpu.jl b/src/gpu.jl index 1de3dbb4..31728da3 100644 --- a/src/gpu.jl +++ b/src/gpu.jl @@ -31,7 +31,8 @@ function Adapt.adapt_structure(to, nhs::PrecomputedNeighborhoodSearch) return PrecomputedNeighborhoodSearch{ndims(nhs)}(neighbor_lists, search_radius, periodic_box, neighborhood_search, - nhs.sort_neighbor_lists) + nhs.sort_neighbor_lists, + nhs.update_neighborhood_search_padding) end function Adapt.adapt_structure(to, cell_list::SpatialHashingCellList{NDIMS}) where {NDIMS} diff --git a/src/neighborhood_search.jl b/src/neighborhood_search.jl index 528c40de..1dc2f05d 100644 --- a/src/neighborhood_search.jl +++ b/src/neighborhood_search.jl @@ -262,13 +262,17 @@ See [`foreach_neighbor_unsafe`](@ref) for a version that skips all bounds checks point, point_coords, search_radius) end +@inline foreach_neighbor_op(::Any, ::Any) = nothing + # This is a function barrier to prevent the `@inbounds` in `foreach_neighbor` # from propagating into the neighbor loop, which is not safe. @inline function foreach_neighbor(f, neighbor_coords, neighborhood_search::AbstractNeighborhoodSearch, point, point_coords, search_radius) - foreach_neighbor_inner(f, neighbor_coords, neighborhood_search, - point, point_coords, search_radius) + mapreduce_neighbor_inner(f, foreach_neighbor_op, + neighbor_coords, neighborhood_search, + point, point_coords, search_radius, nothing) + return nothing end """ @@ -312,8 +316,67 @@ Note that all these bounds checks are safe to skip if point_coords = @inbounds extract_svector(system_coords, Val(ndims(neighborhood_search)), point) - @inbounds foreach_neighbor_inner(f, neighbor_coords, neighborhood_search, - point, point_coords, search_radius) + @inbounds mapreduce_neighbor_inner(f, foreach_neighbor_op, + neighbor_coords, neighborhood_search, + point, point_coords, search_radius, nothing) + return nothing +end + +""" + mapreduce_neighbor(f, op, system_coords, neighbor_coords, + neighborhood_search::AbstractNeighborhoodSearch, point; + init, search_radius = search_radius(neighborhood_search)) + +Apply `f(i, j, pos_diff, d)` to every neighbor of `point` and reduce the results with +the binary operator `op`, analogous to `mapreduce(f, op, collection)`. + +The keyword argument `init` is required and is returned if `point` has no neighbors. +This method performs the same bounds checks as [`foreach_neighbor`](@ref). +See [`mapreduce_neighbor_unsafe`](@ref) for a version that skips all bounds checks. +""" +@propagate_inbounds function mapreduce_neighbor(f, op, system_coords, neighbor_coords, + neighborhood_search::AbstractNeighborhoodSearch, + point; init, + search_radius = search_radius(neighborhood_search)) + point_coords = extract_svector(system_coords, Val(ndims(neighborhood_search)), point) + + mapreduce_neighbor(f, op, neighbor_coords, neighborhood_search, + point, point_coords, search_radius, init) +end + +# This is a function barrier to prevent the `@inbounds` in `mapreduce_neighbor` +# from propagating into the neighbor loop, which is not safe. +@inline function mapreduce_neighbor(f, op, neighbor_coords, + neighborhood_search::AbstractNeighborhoodSearch, + point, point_coords, search_radius, init) + mapreduce_neighbor_inner(f, op, neighbor_coords, neighborhood_search, + point, point_coords, search_radius, init) +end + +""" + mapreduce_neighbor_unsafe(f, op, system_coords, neighbor_coords, + neighborhood_search::AbstractNeighborhoodSearch, point; + init, search_radius = search_radius(neighborhood_search)) + +Like [`mapreduce_neighbor`](@ref), but skips **all** bounds checks. + +See [`foreach_neighbor_unsafe`](@ref) for details on which bounds checks are skipped +and when it is safe to skip them. + +!!! warning + Use this only when `point` is known to be in bounds of `system_coords` + and when `neighborhood_search` is known to be initialized correctly for + `system_coords` and `neighbor_coords`. +""" +@inline function mapreduce_neighbor_unsafe(f, op, system_coords, neighbor_coords, + neighborhood_search::AbstractNeighborhoodSearch, + point; init, + search_radius = search_radius(neighborhood_search)) + point_coords = @inbounds extract_svector(system_coords, Val(ndims(neighborhood_search)), + point) + + @inbounds mapreduce_neighbor_inner(f, op, neighbor_coords, neighborhood_search, + point, point_coords, search_radius, init) end # This is the generic function that is called for `TrivialNeighborhoodSearch`. @@ -321,11 +384,14 @@ end # performance. `PrecomputedNeighborhoodSearch` can skip the distance check altogether. # Note that calling this function with `@inbounds` is not safe. # See the comments in `foreach_neighbor_unsafe`. -@propagate_inbounds function foreach_neighbor_inner(f, neighbor_coords, - neighborhood_search::AbstractNeighborhoodSearch, - point, point_coords, search_radius) +@propagate_inbounds function mapreduce_neighbor_inner(f, op, neighbor_coords, + neighborhood_search::AbstractNeighborhoodSearch, + point, point_coords, + search_radius, init) (; periodic_box) = neighborhood_search + reduced = init + for neighbor in eachneighbor(point_coords, neighborhood_search) neighbor_point_coords = extract_svector(neighbor_coords, Val(ndims(neighborhood_search)), neighbor) @@ -341,11 +407,14 @@ end if distance2 <= search_radius^2 distance = sqrt(distance2) - # Inline to avoid loss of performance - # compared to not using `foreach_point_neighbor`. - @inline f(point, neighbor, pos_diff, distance) + # Inline to avoid loss of performance compared to not using this function + # and unrolling everything. + value = @inline f(point, neighbor, pos_diff, distance) + reduced = @inline op(reduced, value) end end + + return reduced end @inline function compute_periodic_distance(pos_diff, distance2, search_radius, diff --git a/src/nhs_grid.jl b/src/nhs_grid.jl index 8e3c1ff2..6bc5253b 100644 --- a/src/nhs_grid.jl +++ b/src/nhs_grid.jl @@ -510,11 +510,13 @@ end # than looping over `eachneighbor`. # Note that calling this function with `@inbounds` is not safe. # See the comments in `foreach_neighbor_unsafe`. -@propagate_inbounds function foreach_neighbor_inner(f, neighbor_coords, - neighborhood_search::GridNeighborhoodSearch, - point, point_coords, search_radius) +@propagate_inbounds function mapreduce_neighbor_inner(f, op, neighbor_coords, + neighborhood_search::GridNeighborhoodSearch, + point, point_coords, + search_radius, init) (; cell_list, periodic_box) = neighborhood_search cell = cell_coords(point_coords, neighborhood_search) + reduced = init for neighbor_cell_ in neighboring_cells(cell, neighborhood_search) neighbor_cell = Tuple(neighbor_cell_) @@ -545,8 +547,6 @@ end search_radius, periodic_box) if distance2 <= search_radius^2 - distance = sqrt(distance2) - # If this cell has a collision, check if this point belongs to this cell # (only with `SpatialHashingCellList`). if cell_collision && @@ -555,12 +555,17 @@ end continue end - # Inline to avoid loss of performance - # compared to not using `foreach_point_neighbor`. - @inline f(point, neighbor, pos_diff, distance) + distance = sqrt(distance2) + + # Inline to avoid loss of performance compared to not using this function + # and unrolling everything. + value = @inline f(point, neighbor, pos_diff, distance) + reduced = @inline op(reduced, value) end end end + + return reduced end @inline function neighboring_cells(cell, neighborhood_search) diff --git a/src/nhs_precomputed.jl b/src/nhs_precomputed.jl index c7ddceca..093f20f8 100644 --- a/src/nhs_precomputed.jl +++ b/src/nhs_precomputed.jl @@ -2,6 +2,7 @@ PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = 0, periodic_box = nothing, update_strategy = nothing, update_neighborhood_search = GridNeighborhoodSearch{NDIMS}(), + update_neighborhood_search_padding = 0, backend = DynamicVectorOfVectors{Int32}, transpose_backend = false, max_neighbors = max_neighbors(NDIMS)) @@ -40,6 +41,17 @@ to strip the internal neighborhood search, which is not needed anymore. If the precomputed NHS is to be used on the GPU, make sure to either freeze it after initialization and never update it again, or pass a GPU-compatible neighborhood search here. +- `update_neighborhood_search_padding = 0`: Relative padding used for the fixed + search radius of the default internal + [`GridNeighborhoodSearch`](@ref) that computes the neighbor + lists. For example, `update_neighborhood_search_padding = 0.05` + creates the internal search with a 5% larger radius than + `search_radius`. This allows [`update!`](@ref) to rebuild the + precomputed neighbor lists with a larger `search_radius`, so the + lists can be updated less frequently when points move only + slowly. The search radius of the internal update neighborhood + search must be at least as large as the `search_radius` passed + to `update!` to build the precomputed neighbor lists. - `backend = DynamicVectorOfVectors{Int32}`: Type of the data structure to store the neighbor lists. Can be - `Vector{Vector{Int32}}`: Scattered memory, but very memory-efficient. @@ -66,31 +78,37 @@ to strip the internal neighborhood search, which is not needed anymore. """ struct PrecomputedNeighborhoodSearch{NDIMS, NL, ELTYPE, PB, NHS} <: AbstractNeighborhoodSearch - neighbor_lists :: NL - search_radius :: ELTYPE - periodic_box :: PB - neighborhood_search :: NHS - sort_neighbor_lists :: Bool + neighbor_lists :: NL + search_radius :: ELTYPE + periodic_box :: PB + neighborhood_search :: NHS + sort_neighbor_lists :: Bool + update_neighborhood_search_padding :: Float64 function PrecomputedNeighborhoodSearch{NDIMS}(neighbor_lists, search_radius, periodic_box, update_neighborhood_search, - sort_neighbor_lists) where {NDIMS} + sort_neighbor_lists, + update_neighborhood_search_padding) where {NDIMS} return new{NDIMS, typeof(neighbor_lists), typeof(search_radius), typeof(periodic_box), typeof(update_neighborhood_search)}(neighbor_lists, search_radius, periodic_box, update_neighborhood_search, - sort_neighbor_lists) + sort_neighbor_lists, + update_neighborhood_search_padding) end end function PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = 0, periodic_box = nothing, update_strategy = nothing, + update_neighborhood_search_padding = 0, update_neighborhood_search = GridNeighborhoodSearch{NDIMS}(; - search_radius, + search_radius = search_radius * + (1 + + update_neighborhood_search_padding), n_points, periodic_box, update_strategy), @@ -102,7 +120,8 @@ function PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = PrecomputedNeighborhoodSearch{NDIMS}(neighbor_lists, search_radius, periodic_box, update_neighborhood_search, - sort_neighbor_lists) + sort_neighbor_lists, + update_neighborhood_search_padding) end # Default values for maximum neighbor count @@ -135,8 +154,11 @@ function initialize!(search::PrecomputedNeighborhoodSearch, # Initialize grid NHS initialize!(neighborhood_search, x, y; parallelization_backend) + check_update_neighborhood_search_radius(search, search.search_radius) + initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y, - parallelization_backend, search.sort_neighbor_lists) + search.search_radius, parallelization_backend, + search.sort_neighbor_lists) return search end @@ -144,7 +166,7 @@ end function update!(search::PrecomputedNeighborhoodSearch, x::AbstractMatrix, y::AbstractMatrix; points_moving = (true, true), parallelization_backend = default_backend(x), - eachindex_y = axes(y, 2)) + eachindex_y = axes(y, 2), search_radius = search.search_radius) (; neighborhood_search, neighbor_lists) = search if eachindex_y != axes(y, 2) @@ -156,15 +178,30 @@ function update!(search::PrecomputedNeighborhoodSearch, # Skip update if both point sets are static if any(points_moving) - initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y, + check_update_neighborhood_search_radius(search, search_radius) + + initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y, search_radius, parallelization_backend, search.sort_neighbor_lists) end return search end +function check_update_neighborhood_search_radius(search::PrecomputedNeighborhoodSearch, + neighbor_list_search_radius) + update_search_radius = search_radius(search.neighborhood_search) + + if update_search_radius < neighbor_list_search_radius + throw(ArgumentError("the search radius of the internal update neighborhood search " * + "($update_search_radius) must be at least as large as the " * + "search radius used to build the precomputed neighbor lists " * + "($neighbor_list_search_radius)")) + end +end + function initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y, - parallelization_backend, sort_neighbor_lists) + search_radius, parallelization_backend, + sort_neighbor_lists) # Initialize neighbor lists empty!(neighbor_lists) resize!(neighbor_lists, size(x, 2)) @@ -174,14 +211,16 @@ function initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y, # Fill neighbor lists foreach_point_neighbor(x, y, neighborhood_search; - parallelization_backend) do point, neighbor, _, _ - push!(neighbor_lists[point], neighbor) + parallelization_backend) do point, neighbor, _, distance + if distance <= search_radius + push!(neighbor_lists[point], neighbor) + end end end function initialize_neighbor_lists!(neighbor_lists::DynamicVectorOfVectors, - neighborhood_search, x, y, parallelization_backend, - sort_neighbor_lists) + neighborhood_search, x, y, search_radius, + parallelization_backend, sort_neighbor_lists) resize!(neighbor_lists, size(x, 2)) # `Base.empty!.(neighbor_lists)`, but for all backends @@ -191,8 +230,10 @@ function initialize_neighbor_lists!(neighbor_lists::DynamicVectorOfVectors, # Fill neighbor lists foreach_point_neighbor(x, y, neighborhood_search; - parallelization_backend) do point, neighbor, _, _ - pushat!(neighbor_lists, point, neighbor) + parallelization_backend) do point, neighbor, _, distance + if distance <= search_radius + pushat!(neighbor_lists, point, neighbor) + end end if sort_neighbor_lists @@ -202,14 +243,17 @@ end # Note that calling this function with `@inbounds` is not safe. # See the comments in `foreach_neighbor_unsafe`. -@propagate_inbounds function foreach_neighbor_inner(f, neighbor_coords, - neighborhood_search::PrecomputedNeighborhoodSearch, - point, point_coords, search_radius) +@propagate_inbounds function mapreduce_neighbor_inner(f, op, neighbor_coords, + neighborhood_search::PrecomputedNeighborhoodSearch, + point, point_coords, + search_radius, init) (; periodic_box, neighbor_lists) = neighborhood_search # Making the following `@inbounds` is not safe because the neighbor list # might not contain `point` if the NHS was not initialized correctly. neighbors = neighbor_lists[point] + reduced = init + for neighbor_ in eachindex(neighbors) neighbor = @inbounds neighbors[neighbor_] @@ -229,28 +273,35 @@ end distance = sqrt(distance2) - # Inline to avoid loss of performance - # compared to not using `foreach_point_neighbor`. - @inline f(point, neighbor, pos_diff, distance) + # Inline to avoid loss of performance compared to not using this function + # and unrolling everything. + value = @inline f(point, neighbor, pos_diff, distance) + reduced = @inline op(reduced, value) end + + return reduced end function copy_neighborhood_search(nhs::PrecomputedNeighborhoodSearch, search_radius, n_points; eachpoint = 1:n_points) update_neighborhood_search = copy_neighborhood_search(nhs.neighborhood_search, - search_radius, n_points; - eachpoint) + search_radius * (1 + + nhs.update_neighborhood_search_padding), + n_points; eachpoint) # For `Vector{Vector}` backend use `max_neighbors(NDIMS)` as fallback. # This should never be used because this backend doesn't require a `max_neighbors`. max_neighbors_ = max_inner_length(nhs.neighbor_lists, max_neighbors(ndims(nhs))) transpose_backend = transposed_backend(nhs.neighbor_lists) + update_neighborhood_search_padding = nhs.update_neighborhood_search_padding return PrecomputedNeighborhoodSearch{ndims(nhs)}(; search_radius, n_points, periodic_box = nhs.periodic_box, update_neighborhood_search, backend = typeof(nhs.neighbor_lists), transpose_backend, - max_neighbors = max_neighbors_) + max_neighbors = max_neighbors_, + sort_neighbor_lists = nhs.sort_neighbor_lists, + update_neighborhood_search_padding) end @inline function freeze_neighborhood_search(search::PrecomputedNeighborhoodSearch) @@ -261,5 +312,6 @@ end search.search_radius, search.periodic_box, nothing, - search.sort_neighbor_lists) + search.sort_neighbor_lists, + search.update_neighborhood_search_padding) end diff --git a/test/neighborhood_search.jl b/test/neighborhood_search.jl index 3820e11d..5a15f99f 100644 --- a/test/neighborhood_search.jl +++ b/test/neighborhood_search.jl @@ -113,7 +113,7 @@ foreach_point_neighbor(coords, coords, nhs, points = axes(coords, 2)) do point, neighbor, pos_diff, distance - append!(neighbors[point], neighbor) + push!(neighbors[point], neighbor) end # All of these tests are designed to yield the same neighbor lists. @@ -142,14 +142,14 @@ "($(seed == 1 ? "`initialize!`" : "`update!`"))" @testset verbose=true "$(name(cloud_size, seed)))" for cloud_size in cloud_sizes, seed in seeds - coords = point_cloud(cloud_size, seed = seed) + search_radius = 2.5 + coords = point_cloud(cloud_size, search_radius, seed = seed) NDIMS = length(cloud_size) n_points = size(coords, 2) - search_radius = 2.5 # Use different coordinates for `initialize!` and then `update!` with the # correct coordinates to make sure that `update!` is working as well. - coords_initialize = point_cloud(cloud_size, seed = 1) + coords_initialize = point_cloud(cloud_size, search_radius, seed = 1) # Compute expected neighbor lists by brute-force looping over all points # as potential neighbors (`TrivialNeighborhoodSearch`). @@ -163,7 +163,7 @@ neighbor, pos_diff, distance - append!(neighbors_expected[point], neighbor) + push!(neighbors_expected[point], neighbor) end # Expand the domain by `search_radius`, as we need the neighboring cells of @@ -275,7 +275,7 @@ neighbor, pos_diff, distance - append!(neighbors[point], neighbor) + push!(neighbors[point], neighbor) end @test sort.(neighbors) == neighbors_expected @@ -287,11 +287,20 @@ for point in axes(coords, 2) foreach_neighbor(coords, coords, nhs, point) do point, neighbor, pos_diff, distance - append!(neighbors_manual[point], neighbor) + push!(neighbors_manual[point], neighbor) end end @test sort.(neighbors_manual) == neighbors_expected + + # Test that `foreach_neighbor` does not allocate. + point = first(axes(coords, 2)) + function allocations_empty_foreach_neighbor(coords, nhs, point) + @allocated(foreach_neighbor((point, neighbor, pos_diff, + distance) -> nothing, + coords, coords, nhs, point)) + end + @test allocations_empty_foreach_neighbor(coords, nhs, point) == 0 end # Repeat with foreach_point_neighbor_unsafe @@ -302,7 +311,7 @@ neighbor, pos_diff, distance - append!(neighbors_unsafe[point], neighbor) + push!(neighbors_unsafe[point], neighbor) end @test sort.(neighbors_unsafe) == neighbors_expected @@ -315,12 +324,69 @@ foreach_neighbor_unsafe(coords, coords, nhs, point) do point, neighbor, pos_diff, distance - append!(neighbors_manual_unsafe[point], neighbor) + push!(neighbors_manual_unsafe[point], neighbor) end end @test sort.(neighbors_manual_unsafe) == neighbors_expected end + + @testset "`mapreduce_neighbor`" begin + neighbor_sums = map(axes(coords, 2)) do point + mapreduce_neighbor(+, coords, coords, nhs, point; + init = 0) do point_, neighbor, + pos_diff, distance + point_ == point || error("incorrect point index") + neighbor + end + end + + @test neighbor_sums == sum.(neighbors_expected) + + # Test that `mapreduce_neighbor` does not allocate. + point = first(axes(coords, 2)) + function allocations_count_neighbors(coords, nhs, point) + @allocated(mapreduce_neighbor((point, neighbor, pos_diff, + distance) -> neighbor, + +, coords, coords, nhs, point; + init = 0)) + end + @test allocations_count_neighbors(coords, nhs, point) == 0 + + @test_throws UndefKeywordError mapreduce_neighbor(+, coords, coords, + nhs, + first(axes(coords, + 2))) do point_, + neighbor, + pos_diff, + distance + neighbor + end + end + + @testset "`mapreduce_neighbor_unsafe`" begin + neighbor_sums = map(axes(coords, 2)) do point + mapreduce_neighbor_unsafe(+, coords, coords, nhs, point; + init = 0) do point_, neighbor, + pos_diff, distance + point_ == point || error("incorrect point index") + neighbor + end + end + + @test neighbor_sums == sum.(neighbors_expected) + + @test_throws UndefKeywordError mapreduce_neighbor_unsafe(+, + coords, coords, + nhs, + first(axes(coords, + 2))) do point_, + neighbor, + pos_diff, + distance + neighbor + end + end end end end diff --git a/test/nhs_precomputed.jl b/test/nhs_precomputed.jl index fe084352..e4c3801f 100644 --- a/test/nhs_precomputed.jl +++ b/test/nhs_precomputed.jl @@ -35,4 +35,43 @@ pointer_ = pointer(neighbor_lists.backend.parent) @test unsafe_load(pointer_, 1) == 101 @test unsafe_load(pointer_, 2) == 201 + + @testset "`update_neighborhood_search_padding`" begin + search_radius = 1.0 + update_neighborhood_search_padding = 0.05 + nhs = PrecomputedNeighborhoodSearch{2}(; search_radius, + update_neighborhood_search_padding, + n_points = 2) + + @test PointNeighbors.search_radius(nhs.neighborhood_search) == 1.05 + + coordinates = [0.0 1.0 + 0.0 0.0] + + initialize!(nhs, coordinates, coordinates) + @test update!(nhs, coordinates, coordinates; search_radius = 1.05) === nhs + + error = ArgumentError("the search radius of the internal update neighborhood " * + "search (1.05) must be at least as large as the search " * + "radius used to build the precomputed neighbor lists (1.06)") + @test_throws error update!(nhs, coordinates, coordinates; search_radius = 1.06) + end + + @testset "`Adapt.adapt_structure` preserves all fields" begin + nhs = PrecomputedNeighborhoodSearch{3}(; search_radius = 1.0f0, + n_points = 2, + update_neighborhood_search_padding = 0.05) + frozen_nhs = PointNeighbors.freeze_neighborhood_search(nhs) + + adapted_nhs = @trixi_test_nowarn PointNeighbors.Adapt.adapt_structure(Array, + frozen_nhs) + + @test adapted_nhs.neighbor_lists == frozen_nhs.neighbor_lists + @test adapted_nhs.search_radius == frozen_nhs.search_radius + @test adapted_nhs.periodic_box == frozen_nhs.periodic_box + @test adapted_nhs.neighborhood_search == frozen_nhs.neighborhood_search + @test adapted_nhs.sort_neighbor_lists == frozen_nhs.sort_neighbor_lists + @test adapted_nhs.update_neighborhood_search_padding == + frozen_nhs.update_neighborhood_search_padding + end end diff --git a/test/point_cloud.jl b/test/point_cloud.jl index ba99b2e1..64c41bec 100644 --- a/test/point_cloud.jl +++ b/test/point_cloud.jl @@ -1,8 +1,9 @@ using Random # Generate a rectangular point cloud, optionally with a perturbation in the point positions -function point_cloud(n_points_per_dimension; - seed = 1, perturbation_factor_position = 1.0) +function point_cloud(n_points_per_dimension, search_radius; + seed = 1, perturbation_factor_position = 1.0, + sort = true, shuffle = false) # Fixed seed to ensure reproducibility Random.seed!(seed) @@ -10,8 +11,15 @@ function point_cloud(n_points_per_dimension; coordinates = Array{Float64}(undef, n_dims, prod(n_points_per_dimension)) cartesian_indices = CartesianIndices(n_points_per_dimension) + # Extra data structures for the sorting code below + cell_coords = Vector{SVector{n_dims, Int}}(undef, size(coordinates, 2)) + cell_size = ntuple(dim -> search_radius, n_dims) + for i in axes(coordinates, 2) - coordinates[:, i] .= Tuple(cartesian_indices[i]) + point_coords = SVector(Tuple(cartesian_indices[i])) + coordinates[:, i] .= point_coords + cell_coords[i] = PointNeighbors.cell_coords(point_coords, nothing, nothing, + cell_size) .+ 1 end # A standard deviation of 0.05 in the particle coordinates @@ -21,6 +29,23 @@ function point_cloud(n_points_per_dimension; # The benchmark results are also consistent with the timer output of the simulation. perturb!(coordinates, perturbation_factor_position * 0.05) + # Sort by linear cell index + if sort + if shuffle + throw(ArgumentError("cannot sort and shuffle at the same time")) + end + + # Sort by Z index (with `using Morton`) + # permutation = sortperm(cell_coords, by = c -> cartesian2morton(c)) + + permutation = sortperm(cell_coords) + coordinates .= coordinates[:, permutation] + elseif shuffle + # Sort randomly + permutation = shuffle(axes(coordinates, 2)) + coordinates .= coordinates[:, permutation] + end + return coordinates end