Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions inc/mkn/gpu/any/inc/alloc.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,16 @@ template <typename T0, typename T1, typename Size>
void copy(T0* dst, T1* src, Size const size) {
assert(dst and src);

Pointer src_p{src};
Pointer dst_p{dst};
auto const is_dev = [](auto const& ptr) { return ptr.is_device_ptr() || ptr.is_managed_ptr(); };
auto const is_hst = [](auto const& ptr) { return ptr.is_host_ptr() || ptr.is_managed_ptr(); };

auto to_send = [&]() { return dst_p.is_device_ptr() && src_p.is_host_ptr(); };
auto to_take = [&]() { return dst_p.is_host_ptr() && src_p.is_device_ptr(); };
auto on_host = [&]() { return dst_p.is_host_ptr() && src_p.is_host_ptr(); };
auto on_device = [&]() { return dst_p.is_device_ptr() && src_p.is_device_ptr(); };
Pointer const src_p{src};
Pointer const dst_p{dst};

auto const to_send = [&]() { return is_dev(dst_p) && is_hst(src_p); };
auto const to_take = [&]() { return is_hst(dst_p) && is_dev(src_p); };
auto const on_host = [&]() { return is_hst(dst_p) && is_hst(src_p); };
auto const on_device = [&]() { return is_dev(dst_p) && is_dev(src_p); };

if (on_host())
std::copy(src, src + size, dst);
Expand All @@ -130,6 +133,12 @@ void copy(T0* dst, T1* src, Size const size) {
throw std::runtime_error("Unsupported operation (PR welcome)");
}

void copy(auto& dst, auto const& src) {
if (dst.size() > src.size()) throw std::runtime_error("mkn::gpu::copy src too small!");

copy(dst.data(), src.data(), dst.size());
}
Comment thread
PhilipDeegan marked this conversation as resolved.

template <typename T, std::int32_t align>
auto& reserve(std::vector<T, NoConstructAllocator<T, align>>& v, std::size_t const& s,
bool mem_copy = true) {
Expand Down
230 changes: 4 additions & 226 deletions inc/mkn/gpu/cpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,238 +35,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "mkn/kul/log.hpp"
#include "mkn/kul/span.hpp"
#include "mkn/kul/tuple.hpp"
#include "mkn/kul/assert.hpp"

#include "mkn/gpu/def.hpp"
#include "mkn/gpu/cpu/def.hpp"
#include "mkn/gpu/cpu/api.hpp"
#include "mkn/gpu/cpu/cls.hpp"

#include <cassert>
#include <cstring>
#include <algorithm>

#define MKN_GPU_ASSERT(x) (KASSERT((x)))

#if defined(__device__)
#pragma message("__device__ already defined")
#error // check your compiler
#endif

#if defined(__host__)
#pragma message("__host__ already defined")
#error // check your compiler
#endif

#if defined(__global__)
#pragma message("__global__ already defined")
#error // check your compiler
#endif

// we need to exclude these for CPU only operations
#define __shared__
#define __device__
#define __host__
#define __global__
#define __syncthreads(...)

#if defined(MKN_GPU_FN_PER_NS) && MKN_GPU_FN_PER_NS
#define MKN_GPU_NS mkn::gpu::cpu
#else
#define MKN_GPU_NS mkn::gpu
#endif // MKN_GPU_FN_PER_NS

#if !defined(MKN_CPU_DO_NOT_DEFINE_DIM3)
#define MKN_CPU_DO_NOT_DEFINE_DIM3 0
#endif

#if !defined(dim3) and !MKN_CPU_DO_NOT_DEFINE_DIM3
struct dim3 {
dim3() {}
dim3(std::size_t x) : x{x} {}
dim3(std::size_t x, std::size_t y) : x{x}, y{y} {}
dim3(std::size_t x, std::size_t y, std::size_t z) : x{x}, y{y}, z{z} {}

std::size_t x = 1, y = 1, z = 1;
};

dim3 static inline threadIdx, blockIdx;
static constexpr int warpSize = 1;

#endif // MKN_CPU_DO_NOT_DEFINE_DIM3

//

namespace MKN_GPU_NS {
static constexpr int warp_size = warpSize;

void inline setLimitMallocHeapSize(std::size_t const& /*bytes*/) {} /*noop*/

void inline setDevice(std::size_t const& /*dev*/) {} /*noop*/

struct Stream {
Stream() {}
~Stream() {}

auto& operator()() { return stream; };
void sync() {}

std::size_t stream = 0;
};

struct StreamEvent {
StreamEvent(Stream&) {}
~StreamEvent() {}

auto& operator()(std::function<void()> fn = {}) {
fn();
return *this;
}

bool finished() const { return fin; }

Stream stream;
bool fin = 1;
std::function<void()> _fn;
};

template <typename T>
struct Pointer {
Pointer(T* _t) : t{_t} {}

bool is_unregistered_ptr() const { return t == nullptr; }
bool is_host_ptr() const { return true; }
bool is_device_ptr() const { return false; }
bool is_managed_ptr() const { return false; }

T* t;
};

template <typename Size>
void alloc(void*& p, Size size) {
KLOG(TRC) << "CPU alloced: " << size;
MKN_GPU_ASSERT(p = std::malloc(size));
}

template <typename T, typename Size>
void alloc(T*& p, Size size) {
KLOG(TRC) << "CPU alloced: " << size * sizeof(T);
MKN_GPU_ASSERT(p = reinterpret_cast<T*>(std::malloc(size * sizeof(T))));
}

template <typename T, typename Size>
void alloc_host(T*& p, Size size) {
KLOG(TRC) << "CPU alloced: " << size * sizeof(T);
MKN_GPU_ASSERT(p = reinterpret_cast<T*>(std::malloc(size * sizeof(T))));
}

template <typename T, typename Size>
void alloc_managed(T*& p, Size size) {
KLOG(TRC) << "CPU alloced: " << size * sizeof(T);
MKN_GPU_ASSERT(p = reinterpret_cast<T*>(std::malloc(size * sizeof(T))));
}

void inline destroy(void* p) {
KLOG(TRC);
std::free(p);
}

template <typename T>
void destroy(T*& p) {
KLOG(TRC);
std::free(p);
}

template <typename T>
void destroy_host(T*& p) {
KLOG(TRC);
std::free(p);
}

template <typename T, typename Size>
void copy_on_device(T* dst, T const* src, Size size = 1) {
KLOG(TRC);
MKN_GPU_ASSERT(std::memcpy(dst, src, size * sizeof(T)));
}

template <typename Size>
void send(void* p, void* t, Size size = 1) {
KLOG(TRC);
MKN_GPU_ASSERT(std::memcpy(p, t, size));
}

template <typename T, typename Size>
void send(T* p, T const* t, Size size = 1) {
KLOG(TRC);
MKN_GPU_ASSERT(std::memcpy(p, t, size * sizeof(T)));
}
template <typename T, typename Size>
void send_async(T* p, T const* t, auto& /*stream*/, Size size = 1) {
KLOG(TRC);
send(p, t, size);
}

template <typename T, typename Size>
void take(T const* p, T* t, Size size = 1) {
KLOG(TRC);
MKN_GPU_ASSERT(std::memcpy(t, p, size * sizeof(T)));
}

template <typename T, typename Size>
void take_async(T const* p, T* t, auto& /*stream*/, Size size = 1) {
KLOG(TRC);
take(p, t, size);
}

void inline sync() {}

#include "mkn/gpu/any/inc/alloc.ipp"
#include "mkn/gpu/any/inc/device.ipp"

namespace detail {
static thread_local std::size_t idx = 0;
}

template <bool _sync = true, typename F, typename... Args>
void launch(F f, dim3 g, dim3 b, std::size_t /*ds*/, std::size_t /*stream*/, Args&&... args) {
std::size_t N = (g.x * g.y * g.z) * (b.x * b.y * b.z);
KLOG(TRC) << N;
std::apply(
[&](auto&&... params) {
for (std::size_t i = 0; i < N; ++i) {
f(params...);
detail::idx++;
}
},
devmem_replace(std::forward_as_tuple(args...), std::make_index_sequence<sizeof...(Args)>()));

detail::idx = 0;
}

struct Launcher {
Launcher(dim3 _g, dim3 _b) : g{_g}, b{_b} {}
Launcher(std::size_t w, std::size_t h, std::size_t tpx, std::size_t tpy)
: Launcher{dim3(w / tpx, h / tpy), dim3(tpx, tpy)} {}
Launcher(std::size_t x, std::size_t y, std::size_t z, std::size_t tpx, std::size_t tpy,
std::size_t tpz)
: Launcher{dim3(x / tpx, y / tpy, z / tpz), dim3(tpx, tpy, tpz)} {}

template <typename F, typename... Args>
void operator()(F&& f, Args&&... args) {
launch(f, g, b, ds, s, args...);
}

std::size_t ds = 0 /*dynamicShared*/;
dim3 g /*gridDim*/, b /*blockDim*/;
std::size_t s = 0;
};

struct GLauncher : public Launcher {
GLauncher(std::size_t s, [[maybe_unused]] size_t dev = 0) : Launcher{dim3{}, dim3{}}, count{s} {
b.x = 1024;
g.x = s / b.x;
if ((s % b.x) > 0) ++g.x;
}

std::size_t count;
};

template <typename Container, typename T>
void fill(Container& c, size_t const size, T const val) {
Expand All @@ -283,7 +61,7 @@ void fill_warp_size(T* const t, std::size_t const size, T const val) {
std::fill(t, t + size, val);
}

void inline prinfo(std::size_t /*dev*/ = 0) { KOUT(NON) << "Psuedo GPU in use"; }
void inline prinfo(std::size_t /*dev*/ = 0) { KOUT(NON) << "Pseudo GPU in use"; }

} // namespace MKN_GPU_NS

Expand Down
2 changes: 2 additions & 0 deletions inc/mkn/gpu/cpu/api.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#ifndef _MKN_GPU_CPU_API_HPP_
#define _MKN_GPU_CPU_API_HPP_

#include "mkn/gpu/cpu/def.hpp"

#endif /*_MKN_GPU_CPU_API_HPP_*/
Loading
Loading