diff --git a/src/PointNeighbors.jl b/src/PointNeighbors.jl index 7a59bd99..702c19e4 100644 --- a/src/PointNeighbors.jl +++ b/src/PointNeighbors.jl @@ -23,6 +23,7 @@ include("gpu.jl") export foreach_point_neighbor, foreach_neighbor export TrivialNeighborhoodSearch, GridNeighborhoodSearch, PrecomputedNeighborhoodSearch export DictionaryCellList, FullGridCellList, SpatialHashingCellList +export DynamicVectorOfVectors export ParallelUpdate, SemiParallelUpdate, SerialIncrementalUpdate, SerialUpdate, ParallelIncrementalUpdate export requires_update diff --git a/src/cell_lists/cell_lists.jl b/src/cell_lists/cell_lists.jl index 7512329e..346af284 100644 --- a/src/cell_lists/cell_lists.jl +++ b/src/cell_lists/cell_lists.jl @@ -11,16 +11,20 @@ abstract type AbstractCellList end end function construct_backend(::Type{Vector{Vector{T}}}, - max_outer_length, - max_inner_length) where {T} + max_outer_length, max_inner_length; + transpose_backend = false) where {T} + if transpose_backend + error("transpose backend is only supported for DynamicVectorOfVectors backend") + end + return [T[] for _ in 1:max_outer_length] end function construct_backend(::Type{DynamicVectorOfVectors{T}}, - max_outer_length, - max_inner_length) where {T} - cells = DynamicVectorOfVectors{T}(max_outer_length = max_outer_length, - max_inner_length = max_inner_length) + max_outer_length, max_inner_length; + transpose_backend = false) where {T} + cells = DynamicVectorOfVectors{T}(; max_outer_length, max_inner_length, + transpose_backend) resize!(cells, max_outer_length) return cells @@ -30,10 +34,11 @@ end # `DynamicVectorOfVectors{T}`, but a type `DynamicVectorOfVectors{T1, T2, T3, T4}`. # While `A{T} <: A{T1, T2}`, this doesn't hold for the types. # `Type{A{T}} <: Type{A{T1, T2}}` is NOT true. -function construct_backend(::Type{DynamicVectorOfVectors{T1, T2, T3, T4}}, max_outer_length, - max_inner_length) where {T1, T2, T3, T4} +function construct_backend(::Type{DynamicVectorOfVectors{T1, T2, T3, T4}}, + max_outer_length, max_inner_length; + transpose_backend = false) where {T1, T2, T3, T4} return construct_backend(DynamicVectorOfVectors{T1}, max_outer_length, - max_inner_length) + max_inner_length; transpose_backend) end function max_points_per_cell(cells::DynamicVectorOfVectors) diff --git a/src/nhs_grid.jl b/src/nhs_grid.jl index 7b0d5929..3f36a62e 100644 --- a/src/nhs_grid.jl +++ b/src/nhs_grid.jl @@ -30,6 +30,8 @@ since not sorting makes our implementation a lot faster (although less paralleli # Keywords - `search_radius = 0.0`: The fixed search radius. The default of `0.0` is useful together with [`copy_neighborhood_search`](@ref). + Note that the type of `search_radius` determines the type used + for the distance computations. - `n_points = 0`: Total number of points. The default of `0` is useful together with [`copy_neighborhood_search`](@ref). - `periodic_box = nothing`: In order to use a (rectangular) periodic domain, pass a diff --git a/src/nhs_precomputed.jl b/src/nhs_precomputed.jl index d6632234..187ff0c5 100644 --- a/src/nhs_precomputed.jl +++ b/src/nhs_precomputed.jl @@ -3,7 +3,8 @@ periodic_box = nothing, update_strategy = nothing, update_neighborhood_search = GridNeighborhoodSearch{NDIMS}(), backend = DynamicVectorOfVectors{Int32}, - max_neighbors = max_neighbors(NDIMS), + transpose_backend = false, + max_neighbors = max_neighbors(NDIMS)) sort_neighbor_lists = true) Neighborhood search with precomputed neighbor lists. A list of all neighbors is computed @@ -23,6 +24,8 @@ to strip the internal neighborhood search, which is not needed anymore. # Keywords - `search_radius = 0.0`: The fixed search radius. The default of `0.0` is useful together with [`copy_neighborhood_search`](@ref). + Note that the type of `search_radius` determines the type used + for the distance computations. - `n_points = 0`: Total number of points. The default of `0` is useful together with [`copy_neighborhood_search`](@ref). - `periodic_box = nothing`: In order to use a (rectangular) periodic domain, pass a @@ -42,6 +45,18 @@ to strip the internal neighborhood search, which is not needed anymore. - `Vector{Vector{Int32}}`: Scattered memory, but very memory-efficient. - `DynamicVectorOfVectors{Int32}`: Contiguous memory, optimizing cache-hits and GPU-compatible. +- `transpose_backend = false`: Whether to transpose the backend data structure storing the + neighbor lists. This is only supported for the + `DynamicVectorOfVectors` backend. + By default, the neighbors of each point are stored contiguously + in memory. This layout optimizes cache hits when looping + over all neighbors of a point on CPUs. + On GPUs, however, storing all first neighbors of all points + contiguously in memory, then all second neighbors, etc., + (`transpose_backend = true`) allows for coalesced + memory accesses when all threads process the n-th neighbor + of their respective point in parallel. + This can lead to a speedup of ~3x in many cases. - `max_neighbors`: Maximum number of neighbors per particle. This will be used to allocate the `DynamicVectorOfVectors`. It is not used with other backends. The default is 64 in 2D and 324 in 3D. @@ -80,9 +95,10 @@ function PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = periodic_box, update_strategy), backend = DynamicVectorOfVectors{Int32}, + transpose_backend = false, max_neighbors = max_neighbors(NDIMS), sort_neighbor_lists = true) where {NDIMS} - neighbor_lists = construct_backend(backend, n_points, max_neighbors) + neighbor_lists = construct_backend(backend, n_points, max_neighbors; transpose_backend) PrecomputedNeighborhoodSearch{NDIMS}(neighbor_lists, search_radius, periodic_box, update_neighborhood_search, @@ -225,10 +241,12 @@ function copy_neighborhood_search(nhs::PrecomputedNeighborhoodSearch, # For `Vector{Vector}` backend use `max_neighbors(NDIMS)` as fallback. # This should never be used because this backend doesn't require a `max_neighbors`. max_neighbors_ = max_inner_length(nhs.neighbor_lists, max_neighbors(ndims(nhs))) + transpose_backend = transposed_backend(nhs.neighbor_lists) return PrecomputedNeighborhoodSearch{ndims(nhs)}(; search_radius, n_points, periodic_box = nhs.periodic_box, update_neighborhood_search, backend = typeof(nhs.neighbor_lists), + transpose_backend, max_neighbors = max_neighbors_) end diff --git a/src/nhs_trivial.jl b/src/nhs_trivial.jl index 814ac03a..c97f0809 100644 --- a/src/nhs_trivial.jl +++ b/src/nhs_trivial.jl @@ -10,6 +10,8 @@ Trivial neighborhood search that simply loops over all points. # Keywords - `search_radius = 0.0`: The fixed search radius. The default of `0.0` is useful together with [`copy_neighborhood_search`](@ref). + Note that the type of `search_radius` determines the type used + for the distance computations. - `eachpoint = 1:0`: Iterator for all point indices. Usually just `1:n_points`. The default of `1:0` is useful together with [`copy_neighborhood_search`](@ref). diff --git a/src/vector_of_vectors.jl b/src/vector_of_vectors.jl index 038537d7..c05cddbd 100644 --- a/src/vector_of_vectors.jl +++ b/src/vector_of_vectors.jl @@ -13,8 +13,17 @@ struct DynamicVectorOfVectors{T, ARRAY2D, ARRAY1D, L} <: AbstractVector{Array{T, end end -function DynamicVectorOfVectors{T}(; max_outer_length, max_inner_length) where {T} - backend = Array{T, 2}(undef, max_inner_length, max_outer_length) +function DynamicVectorOfVectors{T}(; max_outer_length, max_inner_length, + transpose_backend = false) where {T} + if transpose_backend + # Create a row-major array + backend_ = Array{T, 2}(undef, max_outer_length, max_inner_length) + # Wrap in a `PermutedDimsArray` to obtain the usual column-major access, + # even though the underlying memory layout is row-major. + backend = PermutedDimsArray(backend_, (2, 1)) + else + backend = Array{T, 2}(undef, max_inner_length, max_outer_length) + end length_ = Ref(zero(Int32)) lengths = zeros(Int32, max_outer_length) @@ -203,6 +212,14 @@ function max_inner_length(cells::DynamicVectorOfVectors, fallback) end # Fallback when backend is a `Vector{Vector{T}}`. Only used for copying the cell list. -function max_inner_length(::Any, fallback) +@inline function max_inner_length(::Any, fallback) return fallback end + +@inline function transposed_backend(::DynamicVectorOfVectors{<:Any, <:PermutedDimsArray}) + return true +end + +@inline function transposed_backend(::Any) + return false +end diff --git a/test/nhs_precomputed.jl b/test/nhs_precomputed.jl new file mode 100644 index 00000000..fe084352 --- /dev/null +++ b/test/nhs_precomputed.jl @@ -0,0 +1,38 @@ +@testset verbose=true "PrecomputedNeighborhoodSearch" begin + # Test regular vs transposed backend + nhs = PrecomputedNeighborhoodSearch{2}(transpose_backend = false, n_points = 2) + + # Add test neighbors + neighbor_lists = nhs.neighbor_lists + @test PointNeighbors.transposed_backend(neighbor_lists) == false + neighbor_lists.backend .= 0 + PointNeighbors.pushat!(neighbor_lists, 1, 101) + PointNeighbors.pushat!(neighbor_lists, 1, 102) + PointNeighbors.pushat!(neighbor_lists, 2, 201) + PointNeighbors.pushat!(neighbor_lists, 2, 202) + PointNeighbors.pushat!(neighbor_lists, 2, 203) + + # Check that neighbors are next to each other in memory + pointer_ = pointer(neighbor_lists.backend) + @test unsafe_load(pointer_, 1) == 101 + @test unsafe_load(pointer_, 2) == 102 + + # Transposed backend + nhs = PrecomputedNeighborhoodSearch{2}(transpose_backend = true, n_points = 2) + + # Add test neighbors + neighbor_lists = nhs.neighbor_lists + @test PointNeighbors.transposed_backend(neighbor_lists) == true + neighbor_lists.backend .= 0 + PointNeighbors.pushat!(neighbor_lists, 1, 101) + PointNeighbors.pushat!(neighbor_lists, 1, 102) + PointNeighbors.pushat!(neighbor_lists, 2, 201) + PointNeighbors.pushat!(neighbor_lists, 2, 202) + PointNeighbors.pushat!(neighbor_lists, 2, 203) + + # Check that first neighbors are next to each other in memory + @test neighbor_lists.backend isa PermutedDimsArray + pointer_ = pointer(neighbor_lists.backend.parent) + @test unsafe_load(pointer_, 1) == 101 + @test unsafe_load(pointer_, 2) == 201 +end diff --git a/test/unittest.jl b/test/unittest.jl index bdcedbd4..f0981614 100644 --- a/test/unittest.jl +++ b/test/unittest.jl @@ -4,6 +4,7 @@ include("vector_of_vectors.jl") include("nhs_trivial.jl") include("nhs_grid.jl") + include("nhs_precomputed.jl") include("neighborhood_search.jl") include("cell_lists/full_grid.jl") end;