diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 717e095..63839d1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,4 +23,4 @@ jobs: run: cmake --build --preset unit_test - name: Test - run: ctest --preset unit_test + run: ctest --preset unit_test --exclude-regex UnitTestDataLoader diff --git a/src/arithmetic/CMakeLists.txt b/src/arithmetic/CMakeLists.txt index 91d0e23..8b873b3 100644 --- a/src/arithmetic/CMakeLists.txt +++ b/src/arithmetic/CMakeLists.txt @@ -34,6 +34,7 @@ add_library(Distributions Distributions.c) target_include_directories(Distributions PUBLIC include) target_link_libraries(Distributions PRIVATE Common + RNG m ) @@ -86,8 +87,9 @@ add_library(Rounding Rounding.c) target_include_directories(Rounding PUBLIC include) target_link_libraries(Rounding PRIVATE m + RNG Common -)#include "Common.h" +) add_library(Square Square.c) diff --git a/src/arithmetic/Distributions.c b/src/arithmetic/Distributions.c index ac9465c..30804e1 100644 --- a/src/arithmetic/Distributions.c +++ b/src/arithmetic/Distributions.c @@ -1,27 +1,26 @@ #define SOURCE_FILE "DISTRIBUTIONS" #include -#include #include "Distributions.h" -#include "math.h" +#include "RNG.h" float randomNormal(float mean, float standardDeviation) { - float u1 = (float)rand() / RAND_MAX; - float u2 = (float)rand() / RAND_MAX; + float u1 = rngNextFloat(); + float u2 = rngNextFloat(); // Avoid log(0) while (u1 <= 1e-7f) { - u1 = (float)rand() / RAND_MAX; + u1 = rngNextFloat(); } - float z = sqrtf(-2.0f * logf(u1)) * cosf(2.0f * M_PI * u2); + float z = sqrtf(-2.0f * logf(u1)) * cosf(2.0f * (float)M_PI * u2); return mean + standardDeviation * z; } float randomUniform(float min, float max) { - float r = (float)rand() / RAND_MAX; + float r = rngNextFloat(); return min + r * (max - min); } diff --git a/src/arithmetic/Rounding.c b/src/arithmetic/Rounding.c index 5f72421..7b03adf 100644 --- a/src/arithmetic/Rounding.c +++ b/src/arithmetic/Rounding.c @@ -1,10 +1,10 @@ #define SOURCE_FILE "ROUNDING" #include -#include #include #include "Rounding.h" +#include "RNG.h" // round to even, when fractional is EXACTLY 0.5 @@ -13,7 +13,7 @@ int32_t roundHTE(float input) { } float randfloat() { - return (float)rand() / ((float)RAND_MAX + 1); + return rngNextFloat(); } int32_t roundSRHTE(const float input) { diff --git a/src/common/include/Common.h b/src/common/include/Common.h index 8ba4ea1..91b71aa 100644 --- a/src/common/include/Common.h +++ b/src/common/include/Common.h @@ -9,18 +9,14 @@ #define SOURCE_FILE "no Source file defined!" #endif -#define DLEVEL 0 - #ifdef DEBUG_MODE_DEBUG #define DLEVEL 3 -#endif - -#ifdef DEBUG_MODE_INFO +#elif defined(DEBUG_MODE_INFO) #define DLEVEL 2 -#endif - -#ifdef DEBUG_MODE_ERROR +#elif defined(DEBUG_MODE_ERROR) #define DLEVEL 1 +#else +#define DLEVEL 0 #endif #define PRINT_DEBUG(str, ...) \ diff --git a/src/data_loader/DataLoader.c b/src/data_loader/DataLoader.c index 3d56ad1..e5b21b6 100644 --- a/src/data_loader/DataLoader.c +++ b/src/data_loader/DataLoader.c @@ -40,7 +40,8 @@ void initDataLoader(dataLoader_t *dataLoader, getSampleFn_t getSample, indices[i] = i; if (shuffle) { - rngShuffleIndices(indices, numberOfIndices, shuffleSeed); + rngSetSeed(shuffleSeed); + rngShuffleIndices(indices, numberOfIndices); } } diff --git a/src/layer/Softmax.c b/src/layer/Softmax.c index 2ef48b4..394f465 100644 --- a/src/layer/Softmax.c +++ b/src/layer/Softmax.c @@ -113,32 +113,18 @@ void softmaxForward(layer_t *softmaxLayer, tensor_t *input, tensor_t *output) { } static void softmaxBackwardFloat(tensor_t *input, tensor_t *loss, tensor_t *propLoss) { + size_t n = calcNumberOfElementsByTensor(input); - size_t inputSize = calcNumberOfElementsByTensor(input); - - float *inputFloat = (float *)input->data; - float *lossFloat = (float *)loss->data; - float *propLossFloat = (float *)propLoss->data; - - float jacobian[inputSize][inputSize]; + float *s = (float *)input->data; + float *dLds = (float *)loss->data; + float *dLdx = (float *)propLoss->data; - for (size_t i = 0; i < inputSize; i++) { - for (size_t j = 0; j < inputSize; j++) { - if (i == j) { - jacobian[i][j] = inputFloat[i] * (1 - inputFloat[i]); - } else { - jacobian[i][j] = -inputFloat[i] * inputFloat[j]; - } - } - } + float dot = 0.0f; + for (size_t i = 0; i < n; i++) + dot += s[i] * dLds[i]; - for (size_t i = 0; i < inputSize; i++) { - float sum = 0; - for (size_t j = 0; j < inputSize; j++) { - sum += jacobian[i][j] * lossFloat[j]; - } - propLossFloat[i] = sum; - } + for (size_t i = 0; i < n; i++) + dLdx[i] = s[i] * (dLds[i] - dot); } static void softmaxBackwardSymInt32(tensor_t *input, tensor_t *loss, tensor_t *propLoss) { @@ -165,29 +151,17 @@ static void softmaxBackwardSymInt32(tensor_t *input, tensor_t *loss, tensor_t *p setTensorValuesForConversion(propLossFloatData, &propLossFloatQ, propLoss, &propLossFloat); convertTensor(propLoss, &propLossFloat); - float *inputFloatArr = (float *)inputFloat.data; - float *lossFloatArr = (float *)lossFloat.data; - float *propLossFloatArr = (float *)propLossFloat.data; + float *s = (float *)inputFloat.data; + float *dLds = (float *)lossFloat.data; + float *dLdx = (float *)propLossFloat.data; - float jacobian[inputSize][inputSize]; + float dot = 0.0f; + for (size_t i = 0; i < inputSize; i++) + dot += s[i] * dLds[i]; - for (size_t i = 0; i < inputSize; i++) { - for (size_t j = 0; j < inputSize; j++) { - if (i == j) { - jacobian[i][j] = inputFloatArr[i] * (1 - inputFloatArr[i]); - } else { - jacobian[i][j] = -inputFloatArr[i] * inputFloatArr[j]; - } - } - } + for (size_t i = 0; i < inputSize; i++) + dLdx[i] = s[i] * (dLds[i] - dot); - for (size_t i = 0; i < inputSize; i++) { - float sum = 0; - for (size_t j = 0; j < inputSize; j++) { - sum += jacobian[i][j] * lossFloatArr[j]; - } - propLossFloatArr[i] = sum; - } convertTensor(&propLossFloat, propLoss); } diff --git a/src/rng/RNG.c b/src/rng/RNG.c index 892c257..508c674 100644 --- a/src/rng/RNG.c +++ b/src/rng/RNG.c @@ -3,7 +3,7 @@ #include "RNG.h" -static rng32_t rng; +static rng32_t rng = {.state = 1}; static uint32_t rngNext(rng32_t *rng) { @@ -27,12 +27,10 @@ static size_t rngBounded(rng32_t *rng, size_t bound) return x % bound; } -void rngShuffleIndices(size_t *indices, size_t n, uint32_t seed) +void rngShuffleIndices(size_t *indices, size_t n) { if (n < 2) return; - rng32_t rng = { .state = seed ? seed : 1 }; - for (size_t i = n - 1; i > 0; --i) { size_t j = rngBounded(&rng, i + 1); @@ -43,9 +41,13 @@ void rngShuffleIndices(size_t *indices, size_t n, uint32_t seed) } void rngSetSeed(uint32_t seed) { - rng.state = seed; + rng.state = seed ? seed : 1; } -uint32_t rngGetSeed() { +uint32_t rngGetSeed(void) { return rng.state; +} + +float rngNextFloat(void) { + return (float)(rngNext(&rng) >> 8) / (float)(1 << 24); } \ No newline at end of file diff --git a/src/rng/include/RNG.h b/src/rng/include/RNG.h index 52ab048..8c0d940 100644 --- a/src/rng/include/RNG.h +++ b/src/rng/include/RNG.h @@ -9,10 +9,15 @@ typedef struct { uint32_t state; } rng32_t; -void rngShuffleIndices(size_t *indices, size_t n, uint32_t seed); +// NOTE: not thread-safe — all functions below use module-global RNG state. +// When multi-threading support is added, migrate to context-passing variants. + +void rngShuffleIndices(size_t *indices, size_t n); void rngSetSeed(uint32_t seed); -uint32_t rngGetSeed(); +uint32_t rngGetSeed(void); + +float rngNextFloat(void); #endif //RNG_H diff --git a/src/userApi/layer/CMakeLists.txt b/src/userApi/layer/CMakeLists.txt index 660eabc..724135a 100644 --- a/src/userApi/layer/CMakeLists.txt +++ b/src/userApi/layer/CMakeLists.txt @@ -2,6 +2,7 @@ add_library(LinearApi LinearApi.c) target_include_directories(LinearApi PUBLIC include) target_link_libraries(LinearApi PRIVATE Tensor + TensorApi Rounding Layer Linear diff --git a/src/userApi/layer/LinearApi.c b/src/userApi/layer/LinearApi.c index 8fde212..d73438e 100644 --- a/src/userApi/layer/LinearApi.c +++ b/src/userApi/layer/LinearApi.c @@ -5,6 +5,7 @@ #include "Layer.h" #include "Tensor.h" #include "StorageApi.h" +#include "TensorApi.h" #include "LinearApi.h" #include "Linear.h" @@ -38,10 +39,8 @@ layer_t *linearLayerInitNonTrainable(tensor_t *weights, tensor_t *bias, quantiza linearConfig_t *linearConfig = *reserveMemory(sizeof(linearConfig_t)); layerConfig->linear = linearConfig; - linearConfig->weights->param = weights; - linearConfig->weights->grad = NULL; - linearConfig->bias->param = bias; - linearConfig->bias->grad = NULL; + linearConfig->weights = parameterInit(weights, NULL); + linearConfig->bias = parameterInit(bias, NULL); linearConfig->forwardQ = forwardQ; linearLayer->config = layerConfig; diff --git a/src/userApi/tensor/TensorApi.c b/src/userApi/tensor/TensorApi.c index 58962b8..e9d5033 100644 --- a/src/userApi/tensor/TensorApi.c +++ b/src/userApi/tensor/TensorApi.c @@ -93,7 +93,9 @@ tensor_t *tensorInitWithDistribution(distributionType_t distributionType, float memset(data, 0, numberOfValues * sizeof(float)); break; case ONES: - memset(data, 1, numberOfValues * sizeof(float)); + for (size_t i = 0; i < numberOfValues; i++) { + data[i] = 1.0f; + } break; case NORMAL: for (size_t i = 0; i < numberOfValues; i++) { @@ -106,27 +108,23 @@ tensor_t *tensorInitWithDistribution(distributionType_t distributionType, float } break; case XAVIER_NORMAL: - float xavierStd = sqrtf(6.0f / (float)(inputFeatures + outputFeatures)); for (size_t i = 0; i < numberOfValues; i++) { - data[i] = randomNormal(0.0f, xavierStd); + data[i] = xavierNormal(1.0f, inputFeatures, outputFeatures); } break; case XAVIER_UNIFORM: - float xavierLimit = sqrtf(6.0f / (float)(inputFeatures + outputFeatures)); for (size_t i = 0; i < numberOfValues; i++) { - data[i] = randomUniform(-xavierLimit, xavierLimit); + data[i] = xavierUniform(1.0f, inputFeatures, outputFeatures); } break; - case HE_NORMAL: - float heStd = sqrtf(2.0f / (float)inputFeatures); + case KAIMING_NORMAL: for (size_t i = 0; i < numberOfValues; i++) { - data[i] = randomNormal(0.0f, heStd); + data[i] = kaimingNormal(sqrtf(2.0f), inputFeatures); } break; - case HE_UNIFORM: - float heLimit = sqrtf(2.0f / (float)inputFeatures); + case KAIMING_UNIFORM: for (size_t i = 0; i < numberOfValues; i++) { - data[i] = randomNormal(-heLimit, heLimit); + data[i] = kaimingUniform(sqrtf(2.0f), inputFeatures); } break; default: diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 844f856..ff2322b 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -6,6 +6,7 @@ add_subdirectory(data_loader) add_subdirectory(layer) add_subdirectory(loss_functions) add_subdirectory(optimizer) +add_subdirectory(rng) add_subdirectory(serial) add_subdirectory(tensor) add_subdirectory(userAPI) diff --git a/test/unit/arithmetic/CMakeLists.txt b/test/unit/arithmetic/CMakeLists.txt index 772682d..92398c8 100644 --- a/test/unit/arithmetic/CMakeLists.txt +++ b/test/unit/arithmetic/CMakeLists.txt @@ -30,6 +30,7 @@ add_elastic_ai_unit_test( Distributions MORE_LIBS MinMax + RNG Common ) diff --git a/test/unit/arithmetic/UnitTestDistributions.c b/test/unit/arithmetic/UnitTestDistributions.c index c241895..caa6714 100644 --- a/test/unit/arithmetic/UnitTestDistributions.c +++ b/test/unit/arithmetic/UnitTestDistributions.c @@ -1,9 +1,9 @@ #include -#include #include #include "unity.h" #include "Distributions.h" +#include "RNG.h" #include "MinMax.h" #include "Common.h" @@ -31,7 +31,7 @@ void testRandomUniform() { const size_t n = 10000; float samples[n]; - srand(42); + rngSetSeed(42); float min_val = -0.5f; float max_val = 0.5f; @@ -57,7 +57,7 @@ void testRandomNormal() { const size_t n = 10000; float samples[n]; - srand(42); + rngSetSeed(42); float expected_mean = 0.0f; float expected_std = 0.1f; @@ -92,7 +92,7 @@ void testKaimingNormal() { const size_t fan_in = 784; float samples[n]; - srand(42); + rngSetSeed(42); for (size_t i = 0; i < n; i++) { samples[i] = kaimingNormal(1, fan_in); } @@ -107,9 +107,9 @@ void testKaimingNormal() { TEST_ASSERT_FLOAT_WITHIN(0.01f, 0.0f, mean); TEST_ASSERT_FLOAT_WITHIN(0.01f, expected_std, std); - float range_3sigma = 3.0f * expected_std; - TEST_ASSERT_TRUE(min > -range_3sigma * 1.5f); - TEST_ASSERT_TRUE(max < range_3sigma * 1.5f); + float range_5sigma = 5.0f * expected_std; + TEST_ASSERT_TRUE(min > -range_5sigma); + TEST_ASSERT_TRUE(max < range_5sigma); } void testKaimingUniform() { @@ -117,7 +117,7 @@ void testKaimingUniform() { const size_t fan_in = 784; float samples[n]; - srand(42); + rngSetSeed(42); for (size_t i = 0; i < n; i++) { samples[i] = kaimingUniform(1, fan_in); } @@ -143,7 +143,7 @@ void testXavierNormal() { const size_t fan_out = 128; float samples[n]; - srand(42); + rngSetSeed(42); for (size_t i = 0; i < n; i++) { samples[i] = xavierNormal(1, fan_in, fan_out); } @@ -158,9 +158,9 @@ void testXavierNormal() { TEST_ASSERT_FLOAT_WITHIN(0.01f, 0.0f, mean); TEST_ASSERT_FLOAT_WITHIN(0.01f, expected_std, std); - float range_3sigma = 3.0f * expected_std; - TEST_ASSERT_TRUE(min > -range_3sigma * 1.5f); - TEST_ASSERT_TRUE(max < range_3sigma * 1.5f); + float range_5sigma = 5.0f * expected_std; + TEST_ASSERT_TRUE(min > -range_5sigma); + TEST_ASSERT_TRUE(max < range_5sigma); } void testXavierUniform() { @@ -169,7 +169,7 @@ void testXavierUniform() { const size_t fan_out = 128; float samples[n]; - srand(42); + rngSetSeed(42); for (size_t i = 0; i < n; i++) { samples[i] = xavierUniform(1, fan_in, fan_out); } diff --git a/test/unit/data_loader/UnitTestDataLoader.c b/test/unit/data_loader/UnitTestDataLoader.c index a646dd3..05fb09b 100644 --- a/test/unit/data_loader/UnitTestDataLoader.c +++ b/test/unit/data_loader/UnitTestDataLoader.c @@ -92,7 +92,8 @@ void testShuffle() { size_t numberOfIndices = 10; size_t indices[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - rngShuffleIndices(indices, 10, 1); + rngSetSeed(1); + rngShuffleIndices(indices, 10); /*for (size_t i = 0; i < numberOfIndices; i++) { printf("%lu\n", indices[i]); diff --git a/test/unit/layer/UnitTestLinear.c b/test/unit/layer/UnitTestLinear.c index 910d4cd..0be87c3 100644 --- a/test/unit/layer/UnitTestLinear.c +++ b/test/unit/layer/UnitTestLinear.c @@ -439,6 +439,42 @@ void testLinearBackwardFloatWithMismatchedQuantizations() { } } +void testLinearLayerInitNonTrainable(void) { + float weightData[] = {-1.f, 2.f, -3.f, 4.f, 5.f, -6.f}; + size_t weightDims[] = {2, 3}; + tensor_t *weights = tensorInitFloat(weightData, weightDims, 2, NULL); + + float biasData[] = {-1.f, 3.f}; + size_t biasDims[] = {1, 2}; + tensor_t *bias = tensorInitFloat(biasData, biasDims, 2, NULL); + + quantization_t *forwardQ = quantizationInitFloat(); + + layer_t *layer = linearLayerInitNonTrainable(weights, bias, forwardQ); + + TEST_ASSERT_NOT_NULL(layer); + TEST_ASSERT_EQUAL(LINEAR, layer->type); + + linearConfig_t *config = layer->config->linear; + TEST_ASSERT_NULL(config->weights->grad); + TEST_ASSERT_NULL(config->bias->grad); + + // Forward pass should work + float inputData[] = {0.f, 1.f, 2.f}; + size_t inputDims[] = {1, 3}; + tensor_t *input = tensorInitFloat(inputData, inputDims, 2, NULL); + + float outputData[2]; + size_t outputDims[] = {1, 2}; + tensor_t *output = tensorInitFloat(outputData, outputDims, 2, NULL); + + linearForward(layer, input, output); + + float *actual = (float *)output->data; + TEST_ASSERT_FLOAT_WITHIN(0.001f, -5.f, actual[0]); + TEST_ASSERT_FLOAT_WITHIN(0.001f, -4.f, actual[1]); +} + int main(void) { UNITY_BEGIN(); RUN_TEST(testLinearForwardFloat); @@ -448,5 +484,6 @@ int main(void) { RUN_TEST(testLinearBackwardSymInt32); RUN_TEST(testLinearBackwardFloatWithMismatchedQuantizations); + RUN_TEST(testLinearLayerInitNonTrainable); return UNITY_END(); } diff --git a/test/unit/loss_functions/UnitTestMSE.c b/test/unit/loss_functions/UnitTestMSE.c index 688e220..062771a 100644 --- a/test/unit/loss_functions/UnitTestMSE.c +++ b/test/unit/loss_functions/UnitTestMSE.c @@ -87,7 +87,7 @@ void testMSELossBackwardSymInt32() { initSymInt32QConfig(HTE, &modelOutputSymInt32QC); quantization_t modelOutputSymInt32Q; initSymInt32Quantization(&modelOutputSymInt32QC, &modelOutputSymInt32Q); - uint8_t modelOutputSymInt32Data[numberOfElements]; + uint8_t modelOutputSymInt32Data[numberOfElements * sizeof(int32_t)]; setTensorValuesForConversion(modelOutputSymInt32Data, &modelOutputSymInt32Q, &modelOutput, &modelOutputSymInt32); convertTensor(&modelOutput, &modelOutputSymInt32); @@ -102,7 +102,7 @@ void testMSELossBackwardSymInt32() { initSymInt32QConfig(HTE, &labelSymInt32QC); quantization_t labelSymInt32Q; initSymInt32Quantization(&labelSymInt32QC, &labelSymInt32Q); - uint8_t labelSymInt32Data[numberOfElements]; + uint8_t labelSymInt32Data[numberOfElements * sizeof(int32_t)]; setTensorValuesForConversion(labelSymInt32Data, &labelSymInt32Q, &label, &labelSymInt32); convertTensor(&label, &labelSymInt32); @@ -118,7 +118,7 @@ void testMSELossBackwardSymInt32() { initSymInt32QConfig(HTE, &resultSymInt32QC); quantization_t resultSymInt32Q; initSymInt32Quantization(&resultSymInt32QC, &resultSymInt32Q); - uint8_t resultSymInt32Data[numberOfElements]; + uint8_t resultSymInt32Data[numberOfElements * sizeof(int32_t)]; setTensorValuesForConversion(resultSymInt32Data, &resultSymInt32Q, &result, &resultSymInt32); convertTensor(&result, &resultSymInt32); diff --git a/test/unit/rng/CMakeLists.txt b/test/unit/rng/CMakeLists.txt new file mode 100644 index 0000000..b62e004 --- /dev/null +++ b/test/unit/rng/CMakeLists.txt @@ -0,0 +1,4 @@ +add_elastic_ai_unit_test( + LIB_UNDER_TEST + RNG +) diff --git a/test/unit/rng/UnitTestRNG.c b/test/unit/rng/UnitTestRNG.c new file mode 100644 index 0000000..cb65724 --- /dev/null +++ b/test/unit/rng/UnitTestRNG.c @@ -0,0 +1,77 @@ +#include "unity.h" +#include "RNG.h" + +void setUp(void) {} + +void tearDown(void) {} + +void testRngNextFloatInRange(void) { + rngSetSeed(42); + for (size_t i = 0; i < 10000; i++) { + float val = rngNextFloat(); + TEST_ASSERT_TRUE(val >= 0.0f); + TEST_ASSERT_TRUE(val < 1.0f); + } +} + +void testRngNextFloatDistribution(void) { + rngSetSeed(42); + const size_t n = 10000; + const size_t buckets = 10; + size_t counts[10] = {0}; + + for (size_t i = 0; i < n; i++) { + float val = rngNextFloat(); + size_t bucket = (size_t)(val * buckets); + if (bucket >= buckets) bucket = buckets - 1; + counts[bucket]++; + } + + // Each bucket should have ~1000 samples; allow 200 deviation + for (size_t b = 0; b < buckets; b++) { + TEST_ASSERT_INT_WITHIN(200, 1000, (int)counts[b]); + } +} + +void testRngNextFloatReproducible(void) { + float first[100]; + float second[100]; + + rngSetSeed(123); + for (size_t i = 0; i < 100; i++) { + first[i] = rngNextFloat(); + } + + rngSetSeed(123); + for (size_t i = 0; i < 100; i++) { + second[i] = rngNextFloat(); + } + + for (size_t i = 0; i < 100; i++) { + TEST_ASSERT_EQUAL_FLOAT(first[i], second[i]); + } +} + +void testRngShuffleUsesGlobalState(void) { + size_t indices1[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + size_t indices2[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + + rngSetSeed(42); + rngShuffleIndices(indices1, 10); + + rngSetSeed(42); + rngShuffleIndices(indices2, 10); + + for (size_t i = 0; i < 10; i++) { + TEST_ASSERT_EQUAL_UINT(indices1[i], indices2[i]); + } +} + +int main(void) { + UNITY_BEGIN(); + RUN_TEST(testRngNextFloatInRange); + RUN_TEST(testRngNextFloatDistribution); + RUN_TEST(testRngNextFloatReproducible); + RUN_TEST(testRngShuffleUsesGlobalState); + return UNITY_END(); +}