Skip to content
91 changes: 74 additions & 17 deletions benchmarks/run_benchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,11 @@ run_benchmark(benchmark_count_neighbors, (10, 10), 3,
```
"""
function run_benchmark(benchmark, n_points_per_dimension, iterations, neighborhood_searches;
search_radius_factor = 3.0,
parallelization_backend = PolyesterBackend(),
names = ["Neighborhood search $i"
for i in 1:length(neighborhood_searches)]',
seed = 1, perturbation_factor_position = 1.0)
seed = 1, perturbation_factor_position = 1.0, shuffle = false)
# Multiply number of points in each iteration (roughly) by this factor
scaling_factor = 4
per_dimension_factor = scaling_factor^(1 / length(n_points_per_dimension))
Expand All @@ -64,24 +65,29 @@ function run_benchmark(benchmark, n_points_per_dimension, iterations, neighborho
times = zeros(iterations, length(neighborhood_searches))

for iter in 1:iterations
coordinates = point_cloud(sizes[iter]; seed, perturbation_factor_position)
coordinates_ = point_cloud(sizes[iter], search_radius_factor;
seed, perturbation_factor_position, shuffle)
coordinates = convert.(typeof(search_radius_factor), coordinates_)
domain_size = maximum(sizes[iter]) + 1

# Normalize domain size to 1
coordinates ./= domain_size

# Make this Float32 to make sure that Float32 benchmarks use Float32 exclusively
search_radius = 4.0f0 / domain_size
search_radius = search_radius_factor / domain_size
n_particles = size(coordinates, 2)

neighborhood_searches_copy = copy_neighborhood_search.(neighborhood_searches,
search_radius, n_particles)

for i in eachindex(neighborhood_searches_copy)
neighborhood_search = neighborhood_searches_copy[i]
PointNeighbors.initialize!(neighborhood_search, coordinates, coordinates)
neighborhood_search_ = neighborhood_searches_copy[i]
neighborhood_search = PointNeighbors.Adapt.adapt(parallelization_backend,
neighborhood_search_)
coords = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates)
PointNeighbors.initialize!(neighborhood_search, coords, coords)

time = benchmark(neighborhood_search, coordinates; parallelization_backend)
time = benchmark(neighborhood_search, coords; parallelization_backend)
times[iter, i] = time
time_string = BenchmarkTools.prettytime(time * 1e9)
time_string_per_particle = BenchmarkTools.prettytime(time * 1e9 / n_particles)
Expand Down Expand Up @@ -170,23 +176,74 @@ include("benchmarks/benchmarks.jl")
run_benchmark_gpu(benchmark_n_body, (10, 10), 3)
```
"""
function run_benchmark_gpu(benchmark, n_points_per_dimension, iterations; kwargs...)
function run_benchmark_gpu(benchmark, n_points_per_dimension, iterations;
parallelization_backend=PolyesterBackend(), kwargs...)
NDIMS = length(n_points_per_dimension)

min_corner = 0.0f0 .* n_points_per_dimension
max_corner = Float32.(n_points_per_dimension ./ maximum(n_points_per_dimension))
neighborhood_searches = [GridNeighborhoodSearch{NDIMS}(search_radius = 0.0f0,
cell_list = FullGridCellList(;
search_radius = 0.0f0,
min_corner,
max_corner))
PrecomputedNeighborhoodSearch{NDIMS}(search_radius = 0.0f0)]

names = ["GridNeighborhoodSearch with FullGridCellList";;
"PrecomputedNeighborhoodSearch"]
cell_list = FullGridCellList(; search_radius = 0.0f0, min_corner, max_corner)
grid_nhs = GridNeighborhoodSearch{NDIMS}(; search_radius = 0.0f0, cell_list,
update_strategy = ParallelUpdate())
transpose_backend = parallelization_backend isa PointNeighbors.KernelAbstractions.GPU
neighborhood_searches = [
grid_nhs
PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0f0,
update_neighborhood_search = grid_nhs,
transpose_backend)#, max_neighbors=128)
]

names = [
"GridNeighborhoodSearch with FullGridCellList";;
"PrecomputedNeighborhoodSearch"
]

run_benchmark(benchmark, n_points_per_dimension, iterations,
neighborhood_searches; names, kwargs...)
neighborhood_searches; names, parallelization_backend, kwargs...)
end

"""
run_benchmark_full_grid(benchmark, n_points_per_dimension, iterations; kwargs...)

Shortcut to call [`run_benchmark`](@ref) with a `GridNeighborhoodSearch` with a
`FullGridCellList`. This is the neighborhood search implementation that is used
in TrixiParticles.jl when performance is important.
Use this function to benchmark and profile TrixiParticles.jl kernels.

# Arguments
- `benchmark`: The benchmark function. See [`benchmark_count_neighbors`](@ref),
[`benchmark_n_body`](@ref), [`benchmark_wcsph`](@ref),
[`benchmark_wcsph_fp32`](@ref) and [`benchmark_tlsph`](@ref).
- `n_points_per_dimension`: Initial resolution as tuple. The product is the initial number
of points. For example, use `(100, 100)` for a 2D benchmark or
`(10, 10, 10)` for a 3D benchmark.
- `iterations`: Number of refinement iterations

# Keywords
See [`run_benchmark`](@ref) for a list of available keywords.

# Examples
```julia
include("benchmarks/benchmarks.jl")

run_benchmark_full_grid(benchmark_n_body, (10, 10), 3)
```
"""
function run_benchmark_full_grid(benchmark, n_points_per_dimension, iterations;
parallelization_backend=PolyesterBackend(), kwargs...)
NDIMS = length(n_points_per_dimension)

min_corner = 0.0f0 .* n_points_per_dimension
max_corner = Float32.(n_points_per_dimension ./ maximum(n_points_per_dimension))
cell_list = FullGridCellList(; search_radius = 0.0f0, min_corner, max_corner)
grid_nhs = GridNeighborhoodSearch{NDIMS}(; search_radius = 0.0f0, cell_list,
update_strategy = ParallelUpdate())
neighborhood_searches = [grid_nhs]

names = ["GridNeighborhoodSearch with FullGridCellList";;]

run_benchmark(benchmark, n_points_per_dimension, iterations,
neighborhood_searches; names, parallelization_backend, kwargs...)
end

"""
Expand Down
100 changes: 49 additions & 51 deletions benchmarks/smoothed_particle_hydrodynamics.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using PointNeighbors
using PointNeighbors.Adapt
using TrixiParticles
using BenchmarkTools

Expand Down Expand Up @@ -43,47 +44,30 @@ This method is used to simulate an incompressible fluid.
"""
function benchmark_wcsph(neighborhood_search, coordinates;
parallelization_backend = default_backend(coordinates))
density = 1000.0
particle_spacing = PointNeighbors.search_radius(neighborhood_search) / 3
fluid = InitialCondition(; coordinates, density, mass = 0.1, particle_spacing)

sound_speed = 10.0
state_equation = StateEquationCole(; sound_speed, reference_density = density,
exponent = 1)

viscosity = ArtificialViscosityMonaghan(alpha = 0.02, beta = 0.0)
density_diffusion = DensityDiffusionMolteniColagrossi(delta = 0.1)
# System initialization has to happen on the CPU
coordinates_cpu = PointNeighbors.Adapt.adapt(Array, coordinates)

__benchmark_wcsph_inner(neighborhood_search, fluid, state_equation,
viscosity, density_diffusion, parallelization_backend)
end

"""
benchmark_wcsph_fp32(neighborhood_search, coordinates;
parallelization_backend = default_backend(coordinates))

Like [`benchmark_wcsph`](@ref), but using single precision floating point numbers.
"""
function benchmark_wcsph_fp32(neighborhood_search, coordinates_;
parallelization_backend = default_backend(coordinates_))
coordinates = convert(Matrix{Float32}, coordinates_)
density = 1000.0f0
search_radius = PointNeighbors.search_radius(neighborhood_search)
ELTYPE = typeof(search_radius)
density = convert(ELTYPE, 1000.0)
particle_spacing = PointNeighbors.search_radius(neighborhood_search) / 3
fluid = InitialCondition(; coordinates, density, mass = 0.1f0, particle_spacing)
fluid = InitialCondition(; coordinates = coordinates_cpu, density,
mass = convert(ELTYPE, 0.1) * particle_spacing,
particle_spacing)

sound_speed = 10.0f0
# Make sure that the computed forces are not all zero
for i in eachindex(fluid.density)
fluid.density[i] += rand(eltype(fluid.density))
end

sound_speed = convert(ELTYPE, 10.0)
state_equation = StateEquationCole(; sound_speed, reference_density = density,
exponent = 1)

viscosity = ArtificialViscosityMonaghan(alpha = 0.02f0, beta = 0.0f0)
density_diffusion = DensityDiffusionMolteniColagrossi(delta = 0.1f0)

__benchmark_wcsph_inner(neighborhood_search, fluid, state_equation,
viscosity, density_diffusion, parallelization_backend)
end
viscosity = ArtificialViscosityMonaghan(alpha = convert(ELTYPE, 0.02),
beta = convert(ELTYPE, 0.0))
density_diffusion = DensityDiffusionMolteniColagrossi(delta = convert(ELTYPE, 0.1))

function __benchmark_wcsph_inner(neighborhood_search, initial_condition, state_equation,
viscosity, density_diffusion, parallelization_backend)
# Compact support == 2 * smoothing length for these kernels
smoothing_length = PointNeighbors.search_radius(neighborhood_search) / 2
if ndims(neighborhood_search) == 1
Expand All @@ -92,24 +76,22 @@ function __benchmark_wcsph_inner(neighborhood_search, initial_condition, state_e
smoothing_kernel = WendlandC2Kernel{ndims(neighborhood_search)}()
end

fluid_system = WeaklyCompressibleSPHSystem(initial_condition;
fluid_system = WeaklyCompressibleSPHSystem(fluid;
density_calculator = ContinuityDensity(),
state_equation, smoothing_kernel,
smoothing_length, viscosity,
density_diffusion)

system = PointNeighbors.Adapt.adapt(parallelization_backend, fluid_system)
system = Adapt.adapt(parallelization_backend, fluid_system)

# Remove unnecessary data structures that are only used for initialization
neighborhood_search_ = PointNeighbors.freeze_neighborhood_search(neighborhood_search)
nhs = PointNeighbors.freeze_neighborhood_search(neighborhood_search)

nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search_)
semi = DummySemidiscretization(nhs, parallelization_backend, true)

v = PointNeighbors.Adapt.adapt(parallelization_backend,
vcat(initial_condition.velocity,
initial_condition.density'))
u = PointNeighbors.Adapt.adapt(parallelization_backend, initial_condition.coordinates)
v = Adapt.adapt(parallelization_backend,
vcat(fluid.velocity, fluid.density'))
u = Adapt.adapt(parallelization_backend, fluid.coordinates)
dv = zero(v)

# Initialize the system
Expand All @@ -129,8 +111,18 @@ This method is used to simulate an elastic structure.
"""
function benchmark_tlsph(neighborhood_search, coordinates;
parallelization_backend = default_backend(coordinates))
material = (density = 1000.0, E = 1.4e6, nu = 0.4)
solid = InitialCondition(; coordinates, density = material.density, mass = 0.1)
# System initialization has to happen on the CPU
coordinates_cpu = PointNeighbors.Adapt.adapt(Array, coordinates)

search_radius = PointNeighbors.search_radius(neighborhood_search)
ELTYPE = typeof(search_radius)
material = (density = convert(ELTYPE, 1000.0), E = convert(ELTYPE, 1.4e6),
nu = convert(ELTYPE, 0.4))

# The `particle_spacing` is only required for setting the type of the initial condition
solid = InitialCondition(; coordinates = coordinates_cpu,
density = material.density, mass = convert(ELTYPE, 0.1),
particle_spacing = search_radius)

# Compact support == 2 * smoothing length for these kernels
smoothing_length_ = PointNeighbors.search_radius(neighborhood_search) / 2
Expand All @@ -141,18 +133,24 @@ function benchmark_tlsph(neighborhood_search, coordinates;
smoothing_kernel = WendlandC2Kernel{ndims(neighborhood_search)}()
end

penalty_force = PenaltyForceGanzenmueller(alpha = convert(ELTYPE, 0.1))
solid_system = TotalLagrangianSPHSystem(solid; smoothing_kernel, smoothing_length,
young_modulus = material.E,
poisson_ratio = material.nu)
semi = DummySemidiscretization(neighborhood_search, parallelization_backend, true)
poisson_ratio = material.nu, penalty_force)
system_ = Adapt.adapt(parallelization_backend, solid_system)

v = copy(solid.velocity)
u = copy(solid.coordinates)
# Remove unnecessary data structures that are only used for initialization
nhs = PointNeighbors.freeze_neighborhood_search(neighborhood_search)
system = TrixiParticles.@set system_.self_interaction_nhs = nhs

semi = DummySemidiscretization(nhs, parallelization_backend, true)

v = Adapt.adapt(parallelization_backend, copy(solid.velocity))
u = Adapt.adapt(parallelization_backend, copy(solid.coordinates))
dv = zero(v)

# Initialize the system
TrixiParticles.initialize!(solid_system, semi)
TrixiParticles.initialize!(system, semi)

return @belapsed TrixiParticles.interact!($dv, $v, $u, $v, $u,
$solid_system, $solid_system, $semi)
return @belapsed TrixiParticles.interact_structure_structure!($dv, $v, $system, $semi)
end
6 changes: 3 additions & 3 deletions test/neighborhood_search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -142,14 +142,14 @@
"($(seed == 1 ? "`initialize!`" : "`update!`"))"
@testset verbose=true "$(name(cloud_size, seed)))" for cloud_size in cloud_sizes,
seed in seeds
coords = point_cloud(cloud_size, seed = seed)
search_radius = 2.5
coords = point_cloud(cloud_size, search_radius, seed = seed)
NDIMS = length(cloud_size)
n_points = size(coords, 2)
search_radius = 2.5

# Use different coordinates for `initialize!` and then `update!` with the
# correct coordinates to make sure that `update!` is working as well.
coords_initialize = point_cloud(cloud_size, seed = 1)
coords_initialize = point_cloud(cloud_size, search_radius, seed = 1)

# Compute expected neighbor lists by brute-force looping over all points
# as potential neighbors (`TrivialNeighborhoodSearch`).
Expand Down
31 changes: 28 additions & 3 deletions test/point_cloud.jl
Original file line number Diff line number Diff line change
@@ -1,17 +1,25 @@
using Random

# Generate a rectangular point cloud, optionally with a perturbation in the point positions
function point_cloud(n_points_per_dimension;
seed = 1, perturbation_factor_position = 1.0)
function point_cloud(n_points_per_dimension, search_radius;
seed = 1, perturbation_factor_position = 1.0,
sort = true, shuffle = false)
# Fixed seed to ensure reproducibility
Random.seed!(seed)

n_dims = length(n_points_per_dimension)
coordinates = Array{Float64}(undef, n_dims, prod(n_points_per_dimension))
cartesian_indices = CartesianIndices(n_points_per_dimension)

# Extra data structures for the sorting code below
cell_coords = Vector{SVector{n_dims, Int}}(undef, size(coordinates, 2))
cell_size = ntuple(dim -> search_radius, n_dims)

for i in axes(coordinates, 2)
coordinates[:, i] .= Tuple(cartesian_indices[i])
point_coords = SVector(Tuple(cartesian_indices[i]))
coordinates[:, i] .= point_coords
cell_coords[i] = PointNeighbors.cell_coords(point_coords, nothing, nothing,
cell_size) .+ 1
end

# A standard deviation of 0.05 in the particle coordinates
Expand All @@ -21,6 +29,23 @@ function point_cloud(n_points_per_dimension;
# The benchmark results are also consistent with the timer output of the simulation.
perturb!(coordinates, perturbation_factor_position * 0.05)

# Sort by linear cell index
if sort
if shuffle
throw(ArgumentError("cannot sort and shuffle at the same time"))
end

# Sort by Z index (with `using Morton`)
# permutation = sortperm(cell_coords, by = c -> cartesian2morton(c))

permutation = sortperm(cell_coords)
coordinates .= coordinates[:, permutation]
elseif shuffle
# Sort randomly
permutation = shuffle(axes(coordinates, 2))
coordinates .= coordinates[:, permutation]
end

return coordinates
end

Expand Down
Loading