diff --git a/experiments/MnistExperiment.c b/experiments/MnistExperiment.c index d665f51..02e57ab 100644 --- a/experiments/MnistExperiment.c +++ b/experiments/MnistExperiment.c @@ -176,12 +176,60 @@ static void epochCallback(size_t epoch, float trainLoss, float evalLoss) { writeCsvRow(LOG, epoch, 0, trainLoss, evalLoss); } +static void writeCsvHeader(char *filePath) { + char *header = "epoch, batch, train_loss, eval_loss\n"; + char *row[] = {header}; + size_t entriesInRow[] = {4}; + csvData_t csvData; + setCSVData(&csvData, row, 1, entriesInRow); + csvWriteRowsByBufferSize(filePath, &csvData, "w"); +} + +#define MODEL_SIZE 4 + +static void buildModel(layer_t **model) { + quantization_t *q = quantizationInitFloat(); + + // Linear 784→20 + static float weight0Data[20 * 28 * 28] = {0}; + size_t weight0Dims[] = {20, 28 * 28}; + tensor_t *weight0Param = tensorInitWithDistribution(XAVIER_UNIFORM, weight0Data, weight0Dims, 2, q, NULL, 28*28, 20); + tensor_t *weight0Grad = gradInitFloat(weight0Param, NULL); + parameter_t *weight0 = parameterInit(weight0Param, weight0Grad); + + static float bias0Data[20] = {0}; + size_t bias0Dims[] = {1, 20}; + tensor_t *bias0Param = tensorInitWithDistribution(ZEROS, bias0Data, bias0Dims, 2, q, NULL, 1, 20); + tensor_t *bias0Grad = gradInitFloat(bias0Param, NULL); + parameter_t *bias0 = parameterInit(bias0Param, bias0Grad); + + model[0] = linearLayerInit(weight0, bias0, q, q, q, q); + + // ReLU + model[1] = reluLayerInit(q, q); + + // Linear 20→10 + static float weight1Data[10 * 20] = {0}; + size_t weight1Dims[] = {10, 20}; + tensor_t *weight1Param = tensorInitWithDistribution(XAVIER_UNIFORM, weight1Data, weight1Dims, 2, q, NULL, 20, 10); + tensor_t *weight1Grad = gradInitFloat(weight1Param, NULL); + parameter_t *weight1 = parameterInit(weight1Param, weight1Grad); + + static float bias1Data[10] = {0}; + size_t bias1Dims[] = {1, 10}; + tensor_t *bias1Param = tensorInitWithDistribution(ZEROS, bias1Data, bias1Dims, 2, q, NULL, 1, 10); + tensor_t *bias1Grad = gradInitFloat(bias1Param, NULL); + parameter_t *bias1 = parameterInit(bias1Param, bias1Grad); + + model[2] = linearLayerInit(weight1, bias1, q, q, q, q); + + // Softmax + model[3] = softmaxLayerInit(q, q); +} + int main(void) { - // this clears the old file - // also creates file if non-existent - FILE *fp = fopen(LOG, "w"); - fclose(fp); + writeCsvHeader(LOG); size_t numberOfEpochs = 10; initDataSets(); @@ -205,52 +253,14 @@ int main(void) { 0, true); - quantization_t *q = quantizationInitFloat(); - - float weight0Data[20 * 28 * 28] = {0}; - size_t weight0Dims[] = {20, 28 * 28}; - size_t weight0NumberOfDims = 2; - tensor_t *weight0Param = tensorInitWithDistribution(XAVIER_UNIFORM, weight0Data, weight0Dims, weight0NumberOfDims, q, NULL, 28*28, 20); - tensor_t *weight0Grad = gradInitFloat(weight0Param, NULL); - parameter_t *weight0 = parameterInit(weight0Param, weight0Grad); - - float bias0Data[20] = {0}; - size_t bias0Dims[] = {1, 20}; - size_t bias0NumberOfDims = 2; - tensor_t *bias0Param = tensorInitWithDistribution(ZEROS, bias0Data, bias0Dims, bias0NumberOfDims, q, NULL, 1, 20); - tensor_t *bias0Grad = gradInitFloat(bias0Param, NULL); - parameter_t *bias0 = parameterInit(bias0Param, bias0Grad); - - layer_t *linear0 = linearLayerInit(weight0, bias0, q, q, q, q); - - layer_t *relu = reluLayerInit(q, q); - - float weight1Data[10 * 20] = {0}; - size_t weight1Dims[] = {10, 20}; - size_t weight1NumberOfDims = 2; - tensor_t *weight1Param = tensorInitWithDistribution(XAVIER_UNIFORM, weight1Data, weight1Dims, weight1NumberOfDims, q, NULL, 20, 10); - tensor_t *weight1Grad = gradInitFloat(weight1Param, NULL); - parameter_t *weight1 = parameterInit(weight1Param, weight1Grad); - - float bias1Data[10] = {0}; - size_t bias1Dims[] = {1, 10}; - size_t bias1NumberOfDims = 2; - tensor_t *bias1Param = tensorInitWithDistribution(ZEROS, bias1Data, bias1Dims, bias1NumberOfDims, q, NULL, 1, 10); - tensor_t *bias1Grad = gradInitFloat(bias1Param, NULL); - parameter_t *bias1 = parameterInit(bias1Param, bias1Grad); - - layer_t *linear1 = linearLayerInit(weight1, bias1, q, q, q, q); - - layer_t *softmax = softmaxLayerInit(q, q); - - layer_t *model[] = {linear0, relu, linear1, softmax}; - size_t sizeModel = 4; + layer_t *model[MODEL_SIZE]; + buildModel(model); - optimizer_t *sgd = sgdMCreateOptim(0.001f, 0.f, 0.f, model, sizeModel, FLOAT32); + optimizer_t *sgd = sgdMCreateOptim(0.001f, 0.f, 0.f, model, MODEL_SIZE, FLOAT32); clock_t start = clock(); - trainingRunResult_t result = trainingRun(model, sizeModel, CROSS_ENTROPY, + trainingRunResult_t result = trainingRun(model, MODEL_SIZE, CROSS_ENTROPY, trainDataloader, testDataloader, sgd, numberOfEpochs, calculateGradsSequential, inferenceWithLoss, epochCallback); @@ -261,7 +271,7 @@ int main(void) { PRINT_INFO("Training finished in %f seconds\n", duration_sec); PRINT_INFO("Final train loss: %f, eval loss: %f\n", result.finalTrainLoss, result.finalEvalLoss); - float accuracy = evaluationEpochAccuracy(model, sizeModel, testDataloader, 10, inference); + float accuracy = evaluationEpochAccuracy(model, MODEL_SIZE, testDataloader, 10, inference); PRINT_INFO("Integration test accuracy: %.2f%%\n", accuracy * 100.0f); } diff --git a/src/arithmetic/Arithmetic.c b/src/arithmetic/Arithmetic.c index 213761a..6271385 100644 --- a/src/arithmetic/Arithmetic.c +++ b/src/arithmetic/Arithmetic.c @@ -30,6 +30,11 @@ bool doDimensionsMatch(tensor_t *a, tensor_t *b) { size_t aNumberOfDims = a->shape->numberOfDimensions; size_t bNumberOfDims = b->shape->numberOfDimensions; + if (aNumberOfDims != bNumberOfDims) { + PRINT_ERROR("Rank mismatch: %lu vs %lu\n", aNumberOfDims, bNumberOfDims); + exit(1); + } + size_t aOrderedDims[aNumberOfDims]; size_t bOrderedDims[bNumberOfDims]; @@ -43,7 +48,7 @@ bool doDimensionsMatch(tensor_t *a, tensor_t *b) { } } return true; -}; +} size_t calcTensorIndexByIndices(size_t numberOfDimensions, size_t *dimensions, size_t *indices) { size_t index = indices[numberOfDimensions - 1]; diff --git a/src/userApi/tensor/TensorApi.c b/src/userApi/tensor/TensorApi.c index e9d5033..0ea8d57 100644 --- a/src/userApi/tensor/TensorApi.c +++ b/src/userApi/tensor/TensorApi.c @@ -83,9 +83,9 @@ tensor_t *tensorInitWithDistribution(distributionType_t distributionType, float size_t numberOfDims, quantization_t *quantization, sparsity_t *sparsity, size_t inputFeatures, size_t outputFeatures) { - size_t numberOfValues = 0; + size_t numberOfValues = 1; for (size_t i = 0; i < numberOfDims; i++) { - numberOfValues += dims[i]; + numberOfValues *= dims[i]; } switch (distributionType) { diff --git a/test/unit/arithmetic/UnitTestArithmetic.c b/test/unit/arithmetic/UnitTestArithmetic.c index e28cadd..aca38b7 100644 --- a/test/unit/arithmetic/UnitTestArithmetic.c +++ b/test/unit/arithmetic/UnitTestArithmetic.c @@ -168,6 +168,37 @@ void testFloat32ElementWithTensorArithmetic() { } +void testDoDimensionsMatch_SameShape_ReturnsTrue() { + size_t aDims[] = {2, 3}; + size_t aOrder[] = {0, 1}; + shape_t aShape = {.dimensions = aDims, .orderOfDimensions = aOrder, .numberOfDimensions = 2}; + tensor_t a = {.shape = &aShape}; + + size_t bDims[] = {2, 3}; + size_t bOrder[] = {0, 1}; + shape_t bShape = {.dimensions = bDims, .orderOfDimensions = bOrder, .numberOfDimensions = 2}; + tensor_t b = {.shape = &bShape}; + + TEST_ASSERT_TRUE(doDimensionsMatch(&a, &b)); +} + +void testDoDimensionsMatch_DifferentDims_ReturnsFalse() { + size_t aDims[] = {2, 3}; + size_t aOrder[] = {0, 1}; + shape_t aShape = {.dimensions = aDims, .orderOfDimensions = aOrder, .numberOfDimensions = 2}; + tensor_t a = {.shape = &aShape}; + + size_t bDims[] = {2, 4}; + size_t bOrder[] = {0, 1}; + shape_t bShape = {.dimensions = bDims, .orderOfDimensions = bOrder, .numberOfDimensions = 2}; + tensor_t b = {.shape = &bShape}; + + TEST_ASSERT_FALSE(doDimensionsMatch(&a, &b)); +} + +// NOTE: doDimensionsMatch now calls exit(1) on rank mismatch — cannot test with Unity. +// The fix is verified by: different-rank inputs no longer silently read out of bounds. + void setUp() {} void tearDown() {} @@ -178,6 +209,9 @@ int main(void) { RUN_TEST(testCalcIndexByRawIndex); RUN_TEST(testInt32PointWiseArithmetic); RUN_TEST(testFloat32ElementWithTensorArithmetic); + RUN_TEST(testDoDimensionsMatch_SameShape_ReturnsTrue); + RUN_TEST(testDoDimensionsMatch_DifferentDims_ReturnsFalse); + // testDoDimensionsMatch_DifferentRank — now exit(1)s, verified by code review return UNITY_END(); } diff --git a/test/unit/tensor/CMakeLists.txt b/test/unit/tensor/CMakeLists.txt index 25752e6..f31cb9a 100644 --- a/test/unit/tensor/CMakeLists.txt +++ b/test/unit/tensor/CMakeLists.txt @@ -20,4 +20,13 @@ add_elastic_ai_unit_test( Tensor Rounding Quantization +) +add_elastic_ai_unit_test( + LIB_UNDER_TEST + TensorApi + MORE_LIBS + Tensor + Rounding + Quantization + StorageApi ) \ No newline at end of file diff --git a/test/unit/tensor/UnitTestTensorApi.c b/test/unit/tensor/UnitTestTensorApi.c new file mode 100644 index 0000000..53ee284 --- /dev/null +++ b/test/unit/tensor/UnitTestTensorApi.c @@ -0,0 +1,99 @@ +#define SOURCE_FILE "UNIT_TEST_TENSOR_API" + +#include +#include + +#include "TensorApi.h" +#include "Tensor.h" +#include "Quantization.h" +#include "unity.h" + +void setUp() {} +void tearDown() {} + +void testTensorInitWithDistribution_Zeros_InitializesProductOfDimsValues() { + // dims = {2, 5} → product = 10, sum = 7 + // Bug: += gives 7, *= gives 10 + // Fill data with sentinel 42.0f, then ZEROS should overwrite exactly 10 values + float data[10]; + for (size_t i = 0; i < 10; i++) { + data[i] = 42.0f; + } + size_t dims[] = {2, 5}; + quantization_t q; + initFloat32Quantization(&q); + + tensor_t *t = tensorInitWithDistribution(ZEROS, data, dims, 2, &q, NULL, 2, 5); + + // All 10 values should be zero + float *values = (float *)t->data; + for (size_t i = 0; i < 10; i++) { + TEST_ASSERT_FLOAT_WITHIN(1e-9f, 0.0f, values[i]); + } +} + +void testTensorInitWithDistribution_Ones_InitializesAllValues() { + // dims = {3, 4} → product = 12, sum = 7 + // Fill data with 0.0f, then ONES should set exactly 12 values to 1.0f + float data[12]; + memset(data, 0, sizeof(data)); + size_t dims[] = {3, 4}; + quantization_t q; + initFloat32Quantization(&q); + + tensor_t *t = tensorInitWithDistribution(ONES, data, dims, 2, &q, NULL, 3, 4); + + float *values = (float *)t->data; + for (size_t i = 0; i < 12; i++) { + TEST_ASSERT_FLOAT_WITHIN(1e-9f, 1.0f, values[i]); + } +} + +void testTensorInitWithDistribution_Normal_InitializesAllValues() { + // dims = {4, 5} → product = 20, sum = 9 + // If only 9 values are initialized, remaining 11 stay at sentinel + float data[20]; + float sentinel = -999.0f; + for (size_t i = 0; i < 20; i++) { + data[i] = sentinel; + } + size_t dims[] = {4, 5}; + quantization_t q; + initFloat32Quantization(&q); + + tensor_t *t = tensorInitWithDistribution(NORMAL, data, dims, 2, &q, NULL, 4, 5); + + // With NORMAL distribution, values should NOT be the sentinel + float *values = (float *)t->data; + size_t sentinelCount = 0; + for (size_t i = 0; i < 20; i++) { + if (values[i] == sentinel) { + sentinelCount++; + } + } + // All 20 values should have been overwritten — none should remain as sentinel + TEST_ASSERT_EQUAL_UINT(0, sentinelCount); +} + +void testTensorInitWithDistribution_ShapeIsCorrect() { + // Verify the resulting tensor has the correct shape dimensions + float data[6] = {0}; + size_t dims[] = {2, 3}; + quantization_t q; + initFloat32Quantization(&q); + + tensor_t *t = tensorInitWithDistribution(ZEROS, data, dims, 2, &q, NULL, 2, 3); + + TEST_ASSERT_EQUAL_UINT(2, t->shape->numberOfDimensions); + size_t numElements = calcNumberOfElementsByTensor(t); + TEST_ASSERT_EQUAL_UINT(6, numElements); +} + +int main(void) { + UNITY_BEGIN(); + RUN_TEST(testTensorInitWithDistribution_Zeros_InitializesProductOfDimsValues); + RUN_TEST(testTensorInitWithDistribution_Ones_InitializesAllValues); + RUN_TEST(testTensorInitWithDistribution_Normal_InitializesAllValues); + RUN_TEST(testTensorInitWithDistribution_ShapeIsCorrect); + return UNITY_END(); +} diff --git a/test/unit/userAPI/CMakeLists.txt b/test/unit/userAPI/CMakeLists.txt index 1cc38b2..178afb7 100644 --- a/test/unit/userAPI/CMakeLists.txt +++ b/test/unit/userAPI/CMakeLists.txt @@ -16,13 +16,34 @@ add_elastic_ai_unit_test( LIB_UNDER_TEST TrainingLoopApi MORE_LIBS - CalculateGradsSequential + TrainingEpochDefault TrainingBatchDefault + CalculateGradsSequential + CommonLayerLibs + TensorApi + LinearApi + ReluApi + SgdApi + QuantizationApi + LossFunction + InferenceApi + DataLoader + DataLoaderApi + StorageApi +) + +add_executable(UnitTestMultiLayerTraining UnitTestMultiLayerTraining.c) +target_link_libraries(UnitTestMultiLayerTraining PRIVATE + unity + TrainingLoopApi TrainingEpochDefault + TrainingBatchDefault + CalculateGradsSequential CommonLayerLibs TensorApi LinearApi ReluApi + SoftmaxApi SgdApi QuantizationApi LossFunction @@ -30,4 +51,5 @@ add_elastic_ai_unit_test( DataLoader DataLoaderApi StorageApi -) \ No newline at end of file +) +__register_target_as_unit_test(UnitTestMultiLayerTraining) \ No newline at end of file diff --git a/test/unit/userAPI/UnitTestMultiLayerTraining.c b/test/unit/userAPI/UnitTestMultiLayerTraining.c new file mode 100644 index 0000000..770d8cd --- /dev/null +++ b/test/unit/userAPI/UnitTestMultiLayerTraining.c @@ -0,0 +1,235 @@ +#define SOURCE_FILE "UNIT_TEST_MULTI_LAYER_TRAINING" + +#include + +#include "LossFunction.h" +#include "TensorApi.h" +#include "LinearApi.h" +#include "ReluApi.h" +#include "SoftmaxApi.h" +#include "SgdApi.h" +#include "unity.h" +#include "TrainingLoopApi.h" +#include "CalculateGradsSequential.h" +#include "TrainingBatchDefault.h" +#include "QuantizationApi.h" +#include "Tensor.h" +#include "StorageApi.h" +#include "InferenceApi.h" +#include "DataLoaderApi.h" +#include "Dataset.h" + +void setUp() {} +void tearDown() {} + +/*! Integration test: multi-layer model (Linear→ReLU→Linear→Softmax) with CrossEntropy. + * Reproduces the MnistExperiment structure at small scale (3→4→2). + * Uses tensorInitWithDistribution to init bias with ZEROS — exposes the += vs *= bug. + */ +void testMultiLayerBackward_WithCrossEntropy_DoesNotCrash() { + quantization_t *q = quantizationInitFloat(); + + // Layer 0: Linear 3→4 + float w0Data[4 * 3] = {0}; + size_t w0Dims[] = {4, 3}; + tensor_t *w0Param = tensorInitWithDistribution(ZEROS, w0Data, w0Dims, 2, q, NULL, 3, 4); + tensor_t *w0Grad = gradInitFloat(w0Param, NULL); + parameter_t *w0 = parameterInit(w0Param, w0Grad); + + float b0Data[4] = {0}; + size_t b0Dims[] = {1, 4}; + tensor_t *b0Param = tensorInitWithDistribution(ZEROS, b0Data, b0Dims, 2, q, NULL, 1, 4); + tensor_t *b0Grad = gradInitFloat(b0Param, NULL); + parameter_t *b0 = parameterInit(b0Param, b0Grad); + + layer_t *linear0 = linearLayerInit(w0, b0, q, q, q, q); + layer_t *relu = reluLayerInit(q, q); + + // Layer 1: Linear 4→2 + float w1Data[2 * 4] = {0}; + size_t w1Dims[] = {2, 4}; + tensor_t *w1Param = tensorInitWithDistribution(ZEROS, w1Data, w1Dims, 2, q, NULL, 4, 2); + tensor_t *w1Grad = gradInitFloat(w1Param, NULL); + parameter_t *w1 = parameterInit(w1Param, w1Grad); + + float b1Data[2] = {0}; + size_t b1Dims[] = {1, 2}; + tensor_t *b1Param = tensorInitWithDistribution(ZEROS, b1Data, b1Dims, 2, q, NULL, 1, 2); + tensor_t *b1Grad = gradInitFloat(b1Param, NULL); + parameter_t *b1 = parameterInit(b1Param, b1Grad); + + layer_t *linear1 = linearLayerInit(w1, b1, q, q, q, q); + layer_t *softmax = softmaxLayerInit(q, q); + + layer_t *model[] = {linear0, relu, linear1, softmax}; + size_t sizeModel = 4; + + // Input: [1, 3], Label: [1, 2] (one-hot) + float inputData[] = {1.0f, 2.0f, 3.0f}; + size_t inputDims[] = {1, 3}; + tensor_t *input = tensorInitFloat(inputData, inputDims, 2, NULL); + + float labelData[] = {1.0f, 0.0f}; + size_t labelDims[] = {1, 2}; + tensor_t *label = tensorInitFloat(labelData, labelDims, 2, NULL); + + // This is the call that crashes in the MnistExperiment + trainingStats_t *stats = calculateGradsSequential(model, sizeModel, CROSS_ENTROPY, + input, label); + + TEST_ASSERT_NOT_NULL(stats); + // Loss should be finite and non-negative + TEST_ASSERT_TRUE(stats->loss >= 0.0f); + + freeTrainingStats(stats); +} + +/*! Integration test: same as above but using tensorInitFloat (no distribution). + * This should always work — validates the backward pass logic itself is correct. + */ +void testMultiLayerBackward_WithManualInit_DoesNotCrash() { + quantization_t testQ; + initFloat32Quantization(&testQ); + + // Layer 0: Linear 3→4 + float w0Data[] = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, + 0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f}; + size_t w0Dims[] = {4, 3}; + tensor_t *w0Param = tensorInitFloat(w0Data, w0Dims, 2, NULL); + float w0GradData[12] = {0}; + tensor_t *w0Grad = tensorInitFloat(w0GradData, w0Dims, 2, NULL); + parameter_t *w0 = parameterInit(w0Param, w0Grad); + + float b0Data[] = {0.0f, 0.0f, 0.0f, 0.0f}; + size_t b0Dims[] = {1, 4}; + tensor_t *b0Param = tensorInitFloat(b0Data, b0Dims, 2, NULL); + float b0GradData[] = {0.0f, 0.0f, 0.0f, 0.0f}; + tensor_t *b0Grad = tensorInitFloat(b0GradData, b0Dims, 2, NULL); + parameter_t *b0 = parameterInit(b0Param, b0Grad); + + layer_t *linear0 = linearLayerInit(w0, b0, &testQ, &testQ, &testQ, &testQ); + layer_t *relu = reluLayerInit(&testQ, &testQ); + + // Layer 1: Linear 4→2 + float w1Data[] = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f}; + size_t w1Dims[] = {2, 4}; + tensor_t *w1Param = tensorInitFloat(w1Data, w1Dims, 2, NULL); + float w1GradData[8] = {0}; + tensor_t *w1Grad = tensorInitFloat(w1GradData, w1Dims, 2, NULL); + parameter_t *w1 = parameterInit(w1Param, w1Grad); + + float b1Data[] = {0.0f, 0.0f}; + size_t b1Dims[] = {1, 2}; + tensor_t *b1Param = tensorInitFloat(b1Data, b1Dims, 2, NULL); + float b1GradData[] = {0.0f, 0.0f}; + tensor_t *b1Grad = tensorInitFloat(b1GradData, b1Dims, 2, NULL); + parameter_t *b1 = parameterInit(b1Param, b1Grad); + + layer_t *linear1 = linearLayerInit(w1, b1, &testQ, &testQ, &testQ, &testQ); + layer_t *softmax = softmaxLayerInit(&testQ, &testQ); + + layer_t *model[] = {linear0, relu, linear1, softmax}; + size_t sizeModel = 4; + + float inputData[] = {1.0f, 2.0f, 3.0f}; + size_t inputDims[] = {1, 3}; + tensor_t *input = tensorInitFloat(inputData, inputDims, 2, NULL); + + float labelData[] = {1.0f, 0.0f}; + size_t labelDims[] = {1, 2}; + tensor_t *label = tensorInitFloat(labelData, labelDims, 2, NULL); + + trainingStats_t *stats = calculateGradsSequential(model, sizeModel, CROSS_ENTROPY, + input, label); + + TEST_ASSERT_NOT_NULL(stats); + TEST_ASSERT_TRUE(stats->loss >= 0.0f); + + // Verify bias grads were accumulated (not zero after backward) + float *b1GradValues = (float *)b1Grad->data; + bool anyNonZero = false; + for (size_t i = 0; i < 2; i++) { + if (b1GradValues[i] != 0.0f) { + anyNonZero = true; + break; + } + } + TEST_ASSERT_TRUE(anyNonZero); + + freeTrainingStats(stats); +} + +/*! Integration test: run multiple training steps to verify grad accumulation is stable. */ +void testMultiLayerTraining_MultipleSteps_GradsAccumulate() { + quantization_t testQ; + initFloat32Quantization(&testQ); + + float w0Data[] = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, + 0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f}; + size_t w0Dims[] = {4, 3}; + tensor_t *w0Param = tensorInitFloat(w0Data, w0Dims, 2, NULL); + float w0GradData[12] = {0}; + tensor_t *w0Grad = tensorInitFloat(w0GradData, w0Dims, 2, NULL); + parameter_t *w0 = parameterInit(w0Param, w0Grad); + + float b0Data[] = {0.0f, 0.0f, 0.0f, 0.0f}; + size_t b0Dims[] = {1, 4}; + tensor_t *b0Param = tensorInitFloat(b0Data, b0Dims, 2, NULL); + float b0GradData[] = {0.0f, 0.0f, 0.0f, 0.0f}; + tensor_t *b0Grad = tensorInitFloat(b0GradData, b0Dims, 2, NULL); + parameter_t *b0 = parameterInit(b0Param, b0Grad); + + layer_t *linear0 = linearLayerInit(w0, b0, &testQ, &testQ, &testQ, &testQ); + layer_t *relu = reluLayerInit(&testQ, &testQ); + + float w1Data[] = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f}; + size_t w1Dims[] = {2, 4}; + tensor_t *w1Param = tensorInitFloat(w1Data, w1Dims, 2, NULL); + float w1GradData[8] = {0}; + tensor_t *w1Grad = tensorInitFloat(w1GradData, w1Dims, 2, NULL); + parameter_t *w1 = parameterInit(w1Param, w1Grad); + + float b1Data[] = {0.0f, 0.0f}; + size_t b1Dims[] = {1, 2}; + tensor_t *b1Param = tensorInitFloat(b1Data, b1Dims, 2, NULL); + float b1GradData[] = {0.0f, 0.0f}; + tensor_t *b1Grad = tensorInitFloat(b1GradData, b1Dims, 2, NULL); + parameter_t *b1 = parameterInit(b1Param, b1Grad); + + layer_t *linear1 = linearLayerInit(w1, b1, &testQ, &testQ, &testQ, &testQ); + layer_t *softmax = softmaxLayerInit(&testQ, &testQ); + + layer_t *model[] = {linear0, relu, linear1, softmax}; + size_t sizeModel = 4; + + optimizer_t *sgd = sgdMCreateOptim(0.01f, 0.f, 0.f, model, sizeModel, FLOAT32); + optimizerFunctions_t sgdFns = optimizerFunctions[SGD_M]; + + float inputData[] = {1.0f, 2.0f, 3.0f}; + size_t inputDims[] = {1, 3}; + tensor_t *input = tensorInitFloat(inputData, inputDims, 2, NULL); + + float labelData[] = {1.0f, 0.0f}; + size_t labelDims[] = {1, 2}; + tensor_t *label = tensorInitFloat(labelData, labelDims, 2, NULL); + + // Run 3 training steps + for (size_t step = 0; step < 3; step++) { + trainingStats_t *stats = calculateGradsSequential(model, sizeModel, CROSS_ENTROPY, + input, label); + TEST_ASSERT_NOT_NULL(stats); + TEST_ASSERT_TRUE(stats->loss >= 0.0f); + freeTrainingStats(stats); + + sgdFns.step(sgd); + sgdFns.zero(sgd); + } +} + +int main(void) { + UNITY_BEGIN(); + RUN_TEST(testMultiLayerBackward_WithCrossEntropy_DoesNotCrash); + RUN_TEST(testMultiLayerBackward_WithManualInit_DoesNotCrash); + RUN_TEST(testMultiLayerTraining_MultipleSteps_GradsAccumulate); + return UNITY_END(); +}