From cd44f3facc528510368f4db1470f0ec6e5961011 Mon Sep 17 00:00:00 2001 From: Jan Kieseler Date: Wed, 6 Aug 2025 10:32:44 +0200 Subject: [PATCH 1/5] refactor trainData type handling --- src/djcdata/interface/trainData.h | 18 ++-- src/djcdata/src/bind/bindings.cpp | 19 ++-- src/djcdata/src/trainData.cpp | 145 ++++++++++++++++-------------- 3 files changed, 95 insertions(+), 87 deletions(-) diff --git a/src/djcdata/interface/trainData.h b/src/djcdata/interface/trainData.h index ca8ff65..3ef9827 100644 --- a/src/djcdata/interface/trainData.h +++ b/src/djcdata/interface/trainData.h @@ -17,6 +17,7 @@ #include "IO.h" #include +#include #include "pybind11/pybind11.h" #include "pybind11/numpy.h" @@ -31,6 +32,12 @@ namespace djc{ class typeContainer{ public: + typeContainer() = default; + typeContainer(const typeContainer& rhs); + typeContainer& operator=(const typeContainer& rhs); + typeContainer(typeContainer&& rhs) = default; + typeContainer& operator=(typeContainer&& rhs) = default; + void push_back(simpleArrayBase& a); void move_back(simpleArrayBase& a); @@ -39,7 +46,6 @@ class typeContainer{ return !(*this==rhs); } - simpleArrayBase& at(size_t idx); const simpleArrayBase& at(size_t idx)const; @@ -52,8 +58,7 @@ class typeContainer{ void clear(); - size_t size()const{return sorting_.size();} - + size_t size()const{return arrays_.size();} void writeToFile(FILE *&) const; inline void readFromFile(FILE *&f){ @@ -67,12 +72,7 @@ class typeContainer{ private: void readFromFile_priv(FILE *& f, bool justmetadata); - std::vector farrs_; - std::vector iarrs_; - - enum typesorting{isfloat,isint}; - std::vector > sorting_; - + std::vector > arrays_; }; diff --git a/src/djcdata/src/bind/bindings.cpp b/src/djcdata/src/bind/bindings.cpp index 7c24f5b..1abd570 100644 --- a/src/djcdata/src/bind/bindings.cpp +++ b/src/djcdata/src/bind/bindings.cpp @@ -9,11 +9,16 @@ namespace py = pybind11; +template +void makeBaseArr(M& m){ + using namespace djc; + py::class_(m, "simpleArrayBase"); +} template void makeArr(M& m, std::string name){ using namespace djc; - py::class_ >(m, name.data()) + py::class_, simpleArrayBase >(m, name.data()) .def(py::init()) .def(py::self == py::self) @@ -103,14 +108,9 @@ void makeTD(M & m, std::string name){ // } // )) - .def("storeFeatureArray", static_cast(&trainData::storeFeatureArray)) - .def("storeFeatureArray", static_cast(&trainData::storeFeatureArray)) - - .def("storeTruthArray", static_cast(&trainData::storeTruthArray)) - .def("storeTruthArray", static_cast(&trainData::storeTruthArray)) - - .def("storeWeightArray", static_cast(&trainData::storeWeightArray)) - .def("storeWeightArray", static_cast(&trainData::storeWeightArray)) + .def("storeFeatureArray", &trainData::storeFeatureArray) + .def("storeTruthArray", &trainData::storeTruthArray) + .def("storeWeightArray", &trainData::storeWeightArray) .def("nFeatureArrays", &trainData::nFeatureArrays) @@ -194,6 +194,7 @@ void makeTDG(M & m, std::string name){ //warp it up PYBIND11_MODULE(compiled, m) { + makeBaseArr(m); makeArr(m,"simpleArrayF"); makeArr(m,"simpleArrayI"); diff --git a/src/djcdata/src/trainData.cpp b/src/djcdata/src/trainData.cpp index 1d7f3e8..0533e6d 100644 --- a/src/djcdata/src/trainData.cpp +++ b/src/djcdata/src/trainData.cpp @@ -5,75 +5,88 @@ namespace py=pybind11; namespace djc{ -/* - * - std::vector farrs_; - std::vector iarrs_; +// typeContainer ---------------------------------------------------- + +typeContainer::typeContainer(const typeContainer& rhs){ + for(const auto& p: rhs.arrays_){ + if(p->dtype() == simpleArrayBase::float32) + arrays_.emplace_back(new simpleArray_float32( + dynamic_cast(*p))); + else if(p->dtype() == simpleArrayBase::int32) + arrays_.emplace_back(new simpleArray_int32( + dynamic_cast(*p))); + } +} - enum typesorting{isfloat,isint}; - std::vector > sorting_; - */ +typeContainer& typeContainer::operator=(const typeContainer& rhs){ + if(this!=&rhs){ + arrays_.clear(); + for(const auto& p: rhs.arrays_){ + if(p->dtype() == simpleArrayBase::float32) + arrays_.emplace_back(new simpleArray_float32( + dynamic_cast(*p))); + else if(p->dtype() == simpleArrayBase::int32) + arrays_.emplace_back(new simpleArray_int32( + dynamic_cast(*p))); + } + } + return *this; +} void typeContainer::push_back(simpleArrayBase& a){ - if(a.dtype() == simpleArrayBase::float32){ - farrs_.push_back(dynamic_cast(a)); - sorting_.push_back({isfloat,farrs_.size()-1}); - } - else {//if(a.dtype() == simpleArrayBase::int32){ - iarrs_.push_back(dynamic_cast(a)); - sorting_.push_back({isint,iarrs_.size()-1}); - } + if(a.dtype() == simpleArrayBase::float32) + arrays_.emplace_back(new simpleArray_float32( + dynamic_cast(a))); + else + arrays_.emplace_back(new simpleArray_int32( + dynamic_cast(a))); } + void typeContainer::move_back(simpleArrayBase& a){ - if(a.dtype() == simpleArrayBase::float32){ - farrs_.push_back(std::move(dynamic_cast(a))); - sorting_.push_back({isfloat,farrs_.size()-1}); - } - else {//if(a.dtype() == simpleArrayBase::int32){ - iarrs_.push_back(std::move(dynamic_cast(a))); - sorting_.push_back({isint,iarrs_.size()-1}); - } + if(a.dtype() == simpleArrayBase::float32) + arrays_.emplace_back(new simpleArray_float32( + std::move(dynamic_cast(a)))); + else + arrays_.emplace_back(new simpleArray_int32( + std::move(dynamic_cast(a)))); } + bool typeContainer::operator==(const typeContainer& rhs)const{ if(size() != rhs.size()) return false; - if(farrs_.size() != rhs.farrs_.size()) - return false; - - if(sorting_ != rhs.sorting_) - return false; - - for(size_t i=0;i(a) != + dynamic_cast(b)) + return false; + } + else if(a.dtype() == simpleArrayBase::int32){ + if(dynamic_cast(a) != + dynamic_cast(b)) + return false; + } + else return false; } return true; } -simpleArrayBase& typeContainer::at(size_t idx){ - if(idx>=sorting_.size()) - throw std::out_of_range("typeContainer::at: requested "+std::to_string(idx)+" of "+std::to_string(sorting_.size())); - auto s = sorting_.at(idx); - if(s.first == isfloat) - return farrs_.at(s.second); - else //if(s.first == isint) - return iarrs_.at(s.second); +simpleArrayBase& typeContainer::at(size_t idx){ + if(idx>=arrays_.size()) + throw std::out_of_range("typeContainer::at: requested "+std::to_string(idx)+" of "+std::to_string(arrays_.size())); + return *arrays_.at(idx); } + const simpleArrayBase& typeContainer::at(size_t idx)const{ - if(idx>=sorting_.size()) - throw std::out_of_range("typeContainer::at: requested "+std::to_string(idx)+" of "+std::to_string(sorting_.size())); - auto s = sorting_.at(idx); - if(s.first == isfloat) - return farrs_.at(s.second); - else //if(s.first == isint) - return iarrs_.at(s.second); + if(idx>=arrays_.size()) + throw std::out_of_range("typeContainer::at: requested "+std::to_string(idx)+" of "+std::to_string(arrays_.size())); + return *arrays_.at(idx); } - simpleArray_float32& typeContainer::at_asfloat32(size_t idx){ if(at(idx).dtype() != simpleArrayBase::float32) throw std::runtime_error("typeContainer::at_asfloat32: is not float32"); @@ -98,23 +111,15 @@ const simpleArray_int32& typeContainer::at_asint32(size_t idx)const{ return dynamic_cast(at(idx)); } - void typeContainer::clear(){ - farrs_.clear(); - iarrs_.clear(); - sorting_.clear(); + arrays_.clear(); } void typeContainer::writeToFile(FILE *& ofile) const{ size_t isize=size(); io::writeToFile(&isize,ofile); - for(const auto& i: sorting_){ - if(i.first == isfloat){ - farrs_.at(i.second).addToFileP(ofile); - } - else {// if(i.first == isint){ - iarrs_.at(i.second).addToFileP(ofile); - } + for(const auto& a : arrays_){ + a->addToFileP(ofile); } } @@ -123,18 +128,20 @@ void typeContainer::readFromFile_priv(FILE *& ifile, bool justmetadata){ size_t isize = 0; io::readFromFile(&isize,ifile); for(size_t i=0;i(); + arr->readFromFileP(ifile,justmetadata); + arrays_.emplace_back(std::move(arr)); } - else{ //if(dtype==simpleArrayBase::int32){ - simpleArray_int32 iarr; - iarr.readFromFileP(ifile,justmetadata); - move_back(iarr); + else if(dtype == simpleArrayBase::int32){ + auto arr = std::make_unique(); + arr->readFromFileP(ifile,justmetadata); + arrays_.emplace_back(std::move(arr)); } + else + throw std::runtime_error("typeContainer::readFromFile_priv: unsupported dtype"); } } From 2066d76c4297dbdaedf0f3cea4f1d5f1b7667d64 Mon Sep 17 00:00:00 2001 From: Jan Kieseler Date: Wed, 6 Aug 2025 10:49:37 +0200 Subject: [PATCH 2/5] Use installed pybind11 and expose base array bind --- CMakeLists.txt | 13 ++++++++----- setup.py | 3 ++- src/djcdata/src/bind/bindings.cpp | 6 +++--- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3ae3851..158b98d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8.12) +cmake_minimum_required(VERSION 3.4) project(djcdata) include(CMakePrintHelpers) @@ -10,10 +10,13 @@ include_directories(${SOURCE_DIR}/../interface) file(GLOB_RECURSE SOURCES "${SOURCE_DIR}/*.cpp" "${SOURCE_DIR}/*.c") -add_subdirectory(lib/pybind11) -pybind11_add_module(compiled ${SOURCES} "${SOURCE_DIR}/bind/bindings.cpp") +execute_process( + COMMAND ${PYTHON_EXECUTABLE} -m pybind11 --cmakedir + OUTPUT_VARIABLE pybind11_DIR + OUTPUT_STRIP_TRAILING_WHITESPACE) +find_package(pybind11 CONFIG REQUIRED PATHS ${pybind11_DIR}) +pybind11_add_module(compiled ${SOURCES}) -#shared lib lib -include_directories(lib/pybind11/include) +# shared lib lib # add python etc to be added, also copying includes etc # add_library(djcdata SHARED ${SOURCES}) diff --git a/setup.py b/setup.py index f0360e0..91bad76 100644 --- a/setup.py +++ b/setup.py @@ -88,6 +88,7 @@ def build_extension(self, ext): ext_modules=[CMakeExtension('djcdata/compiled')], cmdclass=dict(build_ext=CMakeBuild), zip_safe=False, - install_requires=['numpy'], + install_requires=['numpy', 'pybind11'], + setup_requires=['pybind11'], scripts=bins ) diff --git a/src/djcdata/src/bind/bindings.cpp b/src/djcdata/src/bind/bindings.cpp index 1abd570..fc35dcc 100644 --- a/src/djcdata/src/bind/bindings.cpp +++ b/src/djcdata/src/bind/bindings.cpp @@ -108,9 +108,9 @@ void makeTD(M & m, std::string name){ // } // )) - .def("storeFeatureArray", &trainData::storeFeatureArray) - .def("storeTruthArray", &trainData::storeTruthArray) - .def("storeWeightArray", &trainData::storeWeightArray) + .def("storeFeatureArray", static_cast(&trainData::storeFeatureArray)) + .def("storeTruthArray", static_cast(&trainData::storeTruthArray)) + .def("storeWeightArray", static_cast(&trainData::storeWeightArray)) .def("nFeatureArrays", &trainData::nFeatureArrays) From 0ff0e03f5cdc5d2be5d7955065c6c24b41acc464 Mon Sep 17 00:00:00 2001 From: Jan Kieseler Date: Wed, 6 Aug 2025 11:03:25 +0200 Subject: [PATCH 3/5] Add pyproject specifying build deps --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..0cba80a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools>=61", "wheel", "pybind11"] +build-backend = "setuptools.build_meta" From ea07952811e25c4534ec52ae30c2a5fad8fa819d Mon Sep 17 00:00:00 2001 From: Jan Kieseler Date: Wed, 6 Aug 2025 11:33:35 +0200 Subject: [PATCH 4/5] Run tests with pytest and fix paths --- .github/workflows/unittests.yml | 10 ++---- test/TestCompatibility.py | 54 +++++++++++++++------------- test/TestDJCDataLoader.py | 4 ++- test/{runtests.py => test_runall.py} | 11 ++++-- 4 files changed, 43 insertions(+), 36 deletions(-) rename test/{runtests.py => test_runall.py} (72%) diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml index 3aa0eeb..04cfae4 100644 --- a/.github/workflows/unittests.yml +++ b/.github/workflows/unittests.yml @@ -27,12 +27,8 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install torch - - - name: Build and install the package - run: python setup.py install + pip install torch pytest + pip install -e . - name: Run unit tests - run: | - cd test - python runtests.py + run: pytest diff --git a/test/TestCompatibility.py b/test/TestCompatibility.py index 52a90b8..aeb2ac4 100644 --- a/test/TestCompatibility.py +++ b/test/TestCompatibility.py @@ -5,42 +5,46 @@ from djcdata import TrainData, SimpleArray import numpy as np import unittest +import os class TestCompatibility(unittest.TestCase): - + def test_SimpleArrayRead(self): print('TestCompatibility SimpleArray') a = SimpleArray() - a.readFromFile("simpleArray_previous.djcsa") - - arr = np.load("np_arr.npy") - #FIXME: this array was actually wrong + basedir = os.path.dirname(__file__) + a.readFromFile(os.path.join(basedir, "simpleArray_previous.djcsa")) + + arr = np.load(os.path.join(basedir, "np_arr.npy")) + # FIXME: this array was actually wrong arr = arr[:100] - rs = np.load("np_rs.npy") - - b = SimpleArray(arr,rs) - - self.assertEqual(a,b) - + rs = np.load(os.path.join(basedir, "np_rs.npy")) + + b = SimpleArray(arr, rs) + + self.assertEqual(a, b) + def test_TrainDataRead(self): print('TestCompatibility TrainData') td = TrainData() - td.readFromFile('trainData_previous.djctd') - + basedir = os.path.dirname(__file__) + td.readFromFile(os.path.join(basedir, 'trainData_previous.djctd')) + self.assertEqual(td.nFeatureArrays(), 1) - - arr = np.load("np_arr.npy") - #FIXME: this array was actually wrong + + arr = np.load(os.path.join(basedir, "np_arr.npy")) + # FIXME: this array was actually wrong arr = arr[:100] - rs = np.load("np_rs.npy") - - b = SimpleArray(arr,rs) - + rs = np.load(os.path.join(basedir, "np_rs.npy")) + + b = SimpleArray(arr, rs) + a = td.transferFeatureListToNumpy(False) - a, rs = a[0],a[1] - - a = SimpleArray(a,np.array(rs,dtype='int64')) - - self.assertEqual(a,b) + a, rs = a[0], a[1] + + a = SimpleArray(a, np.array(rs, dtype='int64')) + + self.assertEqual(a, b) + \ No newline at end of file diff --git a/test/TestDJCDataLoader.py b/test/TestDJCDataLoader.py index 5c39edd..e77ad1b 100644 --- a/test/TestDJCDataLoader.py +++ b/test/TestDJCDataLoader.py @@ -1,9 +1,11 @@ import unittest import shutil import torch +import os +import sys # Adjust the import path to include the directory containing TestTrainDataGenerator.py -# This assumes that the current script is in the same directory +sys.path.append(os.path.dirname(__file__)) from TestTrainDataGenerator import ( RaggedTester, TempFileList, diff --git a/test/runtests.py b/test/test_runall.py similarity index 72% rename from test/runtests.py rename to test/test_runall.py index 40c1bc3..2919286 100644 --- a/test/runtests.py +++ b/test/test_runall.py @@ -1,13 +1,18 @@ +import os +import sys +sys.path.append(os.path.dirname(__file__)) + from TestSimpleArray import TestSimpleArray from TestTrainData import TestTrainData from TestCompatibility import TestCompatibility from TestTrainDataGenerator import TestTrainDataGenerator from TestDJCDataLoader import TestDJCDataLoader -#from TestCFunctions import TestCFunctions +# from TestCFunctions import TestCFunctions from multiprocessing import freeze_support - import unittest + + if __name__ == '__main__': freeze_support() - unittest.main() \ No newline at end of file + unittest.main() From fa8e6676d1396b552dd22bc5664e60c039e75d5d Mon Sep 17 00:00:00 2001 From: Jan Kieseler Date: Wed, 6 Aug 2025 11:59:43 +0200 Subject: [PATCH 5/5] Remove vendored pybind11 tests --- .github/workflows/unittests.yml | 2 -- .gitmodules | 4 ---- MANIFEST.in | 3 +-- lib/pybind11 | 1 - pyproject.toml | 3 +++ 5 files changed, 4 insertions(+), 9 deletions(-) delete mode 100644 .gitmodules delete mode 160000 lib/pybind11 diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml index 04cfae4..28307a7 100644 --- a/.github/workflows/unittests.yml +++ b/.github/workflows/unittests.yml @@ -12,8 +12,6 @@ jobs: steps: - uses: actions/checkout@v3 - with: - submodules: recursive - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index f7047fb..0000000 --- a/.gitmodules +++ /dev/null @@ -1,4 +0,0 @@ -[submodule "lib/pybind11"] - path = lib/pybind11 - url = https://github.com/pybind/pybind11 - branch = stable diff --git a/MANIFEST.in b/MANIFEST.in index fa75d13..86b8b90 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,3 @@ include CMakeLists.txt -recursive-include lib * recursive-include src/djcdata/interface * -recursive-include src/djcdata/src * \ No newline at end of file +recursive-include src/djcdata/src * diff --git a/lib/pybind11 b/lib/pybind11 deleted file mode 160000 index 914c06f..0000000 --- a/lib/pybind11 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 914c06fb252b6cc3727d0eedab6736e88a3fcb01 diff --git a/pyproject.toml b/pyproject.toml index 0cba80a..c0ce77b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,6 @@ [build-system] requires = ["setuptools>=61", "wheel", "pybind11"] build-backend = "setuptools.build_meta" + +[tool.pytest.ini_options] +testpaths = ["test"]