diff --git a/.sublime-project b/.sublime-project index 45091a2..725e39e 100644 --- a/.sublime-project +++ b/.sublime-project @@ -13,5 +13,8 @@ "format_on_save": true, "style": "file" }, + "python-black": { + "format_on_save": "on", + }, } } diff --git a/mkn.yaml b/mkn.yaml index fc54392..e64a5cb 100644 --- a/mkn.yaml +++ b/mkn.yaml @@ -11,21 +11,18 @@ profile: - name: rocm parent: headers arg: -DMKN_GPU_ROCM=1 - # main: tst/any/managed.cpp test: tst/any/(\w).cpp tst/hip/(\w).cpp - name: cuda parent: headers arg: -DMKN_GPU_CUDA=1 - # main: tst/any/managed.cpp test: tst/any/(\w).cpp tst/cuda/(\w).cpp # if you have no GPU but want to test your code - name: cpu parent: headers - # main: tst/any/class1.cpp arg: -DMKN_GPU_CPU=1 test: tst/any/(\w).cpp tst/cpu/(\w).cpp diff --git a/res/poc/jit/eg.py b/res/poc/jit/eg.py new file mode 100644 index 0000000..bf2a6e3 --- /dev/null +++ b/res/poc/jit/eg.py @@ -0,0 +1,34 @@ +import numpy as np +from numba import config + +config.CUDA_ENABLE_PYNVJITLINK = 1 + +import warnings +from numba import cuda +from numba.core.errors import NumbaPerformanceWarning + +warnings.simplefilter("ignore", category=NumbaPerformanceWarning) + +import poc_pyb + +N = 32 # or warpsize + + +@cuda.jit +def vadd(i, a, b, c): + c[i] = a[i] + b[i] + + +@cuda.jit +def vector_add_gpu(a, b, c): + vadd(cuda.threadIdx.x, a, b, c) + + +s = poc_pyb.FunctionSupport() +s.print() +a, b, c = s.A(), s.B(), s.C() +print(c) +vector_add_gpu[1, N](a, b, c) +print(c) +a += 11 +s.print() diff --git a/res/poc/jit/mkn.yaml b/res/poc/jit/mkn.yaml new file mode 100644 index 0000000..b2cc340 --- /dev/null +++ b/res/poc/jit/mkn.yaml @@ -0,0 +1,15 @@ +#! build -p pyb -qx ../../../res/mkn/nvcc.yaml -a "-g" + +name: poc +dep: mkn.gpu + +profile: +- name: pyb + inc: . + dep: lang.pybind11 mkn.gpu + mod: lang.python3 python3.module + src: pybind.cpp + install: . + env: | + MKN_LIB_LINK_LIB=1 + MKN_PYTHON_LIB_EMBED=1 diff --git a/res/poc/jit/pybind.cpp b/res/poc/jit/pybind.cpp new file mode 100644 index 0000000..5d7d231 --- /dev/null +++ b/res/poc/jit/pybind.cpp @@ -0,0 +1,71 @@ + +#include +#include +#include +#include +#include +#include + +#define PRINT(x) std::cout << __FILE__ << " " << __LINE__ << " " << x << std::endl; + +#include "mkn/gpu.hpp" +#include "mkn/gpu/tuple.hpp" + +void init(float* a, float* b) { + mkn::gpu::DLauncher()([a = a, b = b] __device__() { + a[threadIdx.x] = threadIdx.x + 1; + b[threadIdx.x] = threadIdx.x + 2; + }); +} + +namespace py = pybind11; + +struct FunctionSupport { + FunctionSupport() { + mkn::gpu::alloc_managed(a, 32); + mkn::gpu::alloc_managed(b, 32); + mkn::gpu::alloc_managed(c, 32); + print(); + init(a, b); + print(); + } + ~FunctionSupport() { + mkn::gpu::destroy(a); + mkn::gpu::destroy(b); + mkn::gpu::destroy(c); + } + + void print() { + PRINT(a[0]); + PRINT(b[0]); + PRINT(c[0]); + } + + py::array_t make(auto p) { + return {{32}, {sizeof(float)}, p, py::capsule(p, [](void* f) { /* noop */ })}; + } + + py::array_t A() { return make(a); } + py::array_t B() { return make(b); } + py::array_t C() { return make(c); } + + float* a = nullptr; + float* b = nullptr; + float* c = nullptr; +}; + +PYBIND11_MODULE(poc_pyb, m) { + py::class_(m, "FunctionSupport") + .def(py::init<>()) + .def("print", &FunctionSupport::print) + .def_readwrite("a", &FunctionSupport::a) + .def("A", &FunctionSupport::A) + .def_readwrite("b", &FunctionSupport::b) + .def("B", &FunctionSupport::B) + .def_readwrite("a", &FunctionSupport::a) + .def("C", &FunctionSupport::C); + + using Span_t = mkn::gpu::Span; + py::class_(m, "Span_s") + .def("__getitem__", [](Span_t& self, unsigned index) { return self[index]; }); +} diff --git a/res/poc/jit/requirements.txt b/res/poc/jit/requirements.txt new file mode 100644 index 0000000..2536826 --- /dev/null +++ b/res/poc/jit/requirements.txt @@ -0,0 +1,2 @@ +numpy +numba-cuda diff --git a/tst/any/async_streaming.cpp b/tst/any/async_streaming.cpp index c4f4979..70fd285 100644 --- a/tst/any/async_streaming.cpp +++ b/tst/any/async_streaming.cpp @@ -7,6 +7,8 @@ #include +#include + using namespace mkn::gpu; using namespace std::chrono_literals;